CVPR 2026 Open Access Repository

Papers

Back
Generalizable Structure-Aware Keypoint Correspondence for Category-Unified 3D Single Object Tracking: Jie Xiao,

Yinchao Ma,

Yuyang Tang,

Dengqing Yang,

Jianpeng Yang,

Xu Zhou,

Qiao Li,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Jie and Ma, Yinchao and Tang, Yuyang and Yang, Dengqing and Yang, Jianpeng and Zhou, Xu and Li, Qiao and Yang, Wenfei and Zhang, Tianzhu}, title = {Generalizable Structure-Aware Keypoint Correspondence for Category-Unified 3D Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28156-28166} }
DirectFisheye-GS: Enabling Native Fisheye Input in Gaussian Splatting with Cross-View Joint Optimization: Zhengxian Yang,

Fei Xie,

Xutao Xue,

Rui Zhang,

Taicheng Huang,

Yang Liu,

Mengqi Ji,

Tao Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhengxian and Xie, Fei and Xue, Xutao and Zhang, Rui and Huang, Taicheng and Liu, Yang and Ji, Mengqi and Yu, Tao}, title = {DirectFisheye-GS: Enabling Native Fisheye Input in Gaussian Splatting with Cross-View Joint Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4942-4952} }
CompBench: Benchmarking Complex Instruction-guided Image Editing: Bohan Jia,

Wenxuan Huang,

Yuntian Tang,

Junbo Qiao,

Jincheng Liao,

Shaosheng Cao,

Fei Zhao,

Zhaopeng Feng,

Zhouhong Gu,

Zhenfei Yin,

Lei Bai,

Wanli Ouyang,

Lin Chen,

Fei Zhao,

Zihan Wang,

Yuan Xie,

Shaohui Lin; [pdf] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Bohan and Huang, Wenxuan and Tang, Yuntian and Qiao, Junbo and Liao, Jincheng and Cao, Shaosheng and Zhao, Fei and Feng, Zhaopeng and Gu, Zhouhong and Yin, Zhenfei and Bai, Lei and Ouyang, Wanli and Chen, Lin and Zhao, Fei and Wang, Zihan and Xie, Yuan and Lin, Shaohui}, title = {CompBench: Benchmarking Complex Instruction-guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1112-1122} }
Choreographing a World of Dynamic Objects: Yanzhe Lyu,

Chen Geng,

Karthik Dharmarajan,

Yunzhi Zhang,

Hadi Alzayer,

Shangzhe Wu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Yanzhe and Geng, Chen and Dharmarajan, Karthik and Zhang, Yunzhi and Alzayer, Hadi and Wu, Shangzhe and Wu, Jiajun}, title = {Choreographing a World of Dynamic Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32647-32658} }
Spk2VidNet: A Hierarchical Recurrent Architecture for High-Fidelity Video Reconstruction from Long Spike-Camera Streams: Yuanlin Wang,

Ruiqin Xiong,

Jiyu Xie,

Zhenkun Zhu,

Zhaofei Yu,

Xiaopeng Fan,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanlin and Xiong, Ruiqin and Xie, Jiyu and Zhu, Zhenkun and Yu, Zhaofei and Fan, Xiaopeng and Huang, Tiejun}, title = {Spk2VidNet: A Hierarchical Recurrent Architecture for High-Fidelity Video Reconstruction from Long Spike-Camera Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12140-12149} }
Continual Distillation of Teachers from Different Domains: Nicolas Michel,

Maorong Wang,

Jiangpeng He,

Toshihiko Yamasaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Michel_2026_CVPR, author = {Michel, Nicolas and Wang, Maorong and He, Jiangpeng and Yamasaki, Toshihiko}, title = {Continual Distillation of Teachers from Different Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10810-10819} }
GT-SVJ: Generative-Transformer-Based Self-Supervised Video Judge For Efficient Video Reward Modeling: Shivanshu Shekhar,

Uttaran Bhattacharya,

Raghavendra Addanki,

Mehrab Tanjim,

Somdeb Sarkhel,

Tong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shekhar_2026_CVPR, author = {Shekhar, Shivanshu and Bhattacharya, Uttaran and Addanki, Raghavendra and Tanjim, Mehrab and Sarkhel, Somdeb and Zhang, Tong}, title = {GT-SVJ: Generative-Transformer-Based Self-Supervised Video Judge For Efficient Video Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9847-9858} }
Beyond Euclidean Gossip: KL-Barycentric Consensus on Heterogeneous and Imbalanced Images: Lu Xu,

Guosheng Yin; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Lu and Yin, Guosheng}, title = {Beyond Euclidean Gossip: KL-Barycentric Consensus on Heterogeneous and Imbalanced Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6167-6175} }
HybridDriveVLA: Vision-Language-Action Model with Visual CoT reasoning and ToT Evaluation for Autonomous Driving: Yipene Cedric Francois Bassole,

Sungwoo Kim,

Jiwoo Jung,

Yunsick Sung; [pdf] [supp]
[bibtex]
@InProceedings{Bassole_2026_CVPR, author = {Bassole, Yipene Cedric Francois and Kim, Sungwoo and Jung, Jiwoo and Sung, Yunsick}, title = {HybridDriveVLA: Vision-Language-Action Model with Visual CoT reasoning and ToT Evaluation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32421-32430} }
Training-free, Perceptually Consistent Low-Resolution Previews with High-Resolution Image for Efficient Workflows of Diffusion Models: Wongi Jeong,

Hoigi Seo,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wongi and Seo, Hoigi and Chun, Se Young}, title = {Training-free, Perceptually Consistent Low-Resolution Previews with High-Resolution Image for Efficient Workflows of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4547-4557} }
Catalyst4D: High-Fidelity 3D-to-4D Scene Editing via Dynamic Propagation: Shifeng Chen,

Yihui Li,

Jun Liao,

Hongyu Yang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shifeng and Li, Yihui and Liao, Jun and Yang, Hongyu and Huang, Di}, title = {Catalyst4D: High-Fidelity 3D-to-4D Scene Editing via Dynamic Propagation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29792-29802} }
Cloning Deterministic Worlds: The Critical Role of Latent Geometry in Long-Horizon World Models: Zaishuo Xia,

Yukuan Lu,

Xinyi Li,

Yifan Xu,

Yubei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zaishuo and Lu, Yukuan and Li, Xinyi and Xu, Yifan and Chen, Yubei}, title = {Cloning Deterministic Worlds: The Critical Role of Latent Geometry in Long-Horizon World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32602-32612} }
CME-CAD: Heterogeneous Collaborative Multi-Expert Reinforcement Learning for CAD Code Generation: Ke Niu,

Haiyang Yu,

Zhuofan Chen,

Zhengtao Yao,

Weitao Jia,

Xiaodong Ge,

Jingqun Tang,

Benlei Cui,

Bin Li,

Xiangyang Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Ke and Yu, Haiyang and Chen, Zhuofan and Yao, Zhengtao and Jia, Weitao and Ge, Xiaodong and Tang, Jingqun and Cui, Benlei and Li, Bin and Xue, Xiangyang}, title = {CME-CAD: Heterogeneous Collaborative Multi-Expert Reinforcement Learning for CAD Code Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39272-39281} }
Unique Lives, Shared World: Learning from Single-Life Videos: Tengda Han,

Sayna Ebrahimi,

Dilara Gokay,

Li Yang Ku,

Maks Ovsjanikov,

Iva Babukova,

Daniel Zoran,

Viorica Patraucean,

Joao Carreira,

Andrew Zisserman,

Dima Damen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Tengda and Ebrahimi, Sayna and Gokay, Dilara and Ku, Li Yang and Ovsjanikov, Maks and Babukova, Iva and Zoran, Daniel and Patraucean, Viorica and Carreira, Joao and Zisserman, Andrew and Damen, Dima}, title = {Unique Lives, Shared World: Learning from Single-Life Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24021-24030} }
Quantized Residuals to Continuous Prompts for Few-Shot Class Incremental Learning in Vision-Language Models: Abhishek Kumar Sinha,

Nitant Dube,

Soma Biswas; [pdf] [supp]
[bibtex]
@InProceedings{Sinha_2026_CVPR, author = {Sinha, Abhishek Kumar and Dube, Nitant and Biswas, Soma}, title = {Quantized Residuals to Continuous Prompts for Few-Shot Class Incremental Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3856-3865} }
EcoSplat: Efficiency-controllable Feed-forward 3D Gaussian Splatting from Multi-view Images: Minh-Quan Viet Bui,

Jongmin Park,

Juan Luis Gonzalez,

Jaeho Moon,

Jihyong Oh,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bui_2026_CVPR, author = {Bui, Minh-Quan Viet and Park, Jongmin and Gonzalez, Juan Luis and Moon, Jaeho and Oh, Jihyong and Kim, Munchurl}, title = {EcoSplat: Efficiency-controllable Feed-forward 3D Gaussian Splatting from Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26010-26020} }
UniComp: Rethinking Video Compression Through Informational Uniqueness: Chao Yuan,

Shimin Chen,

Minliang Lin,

Limeng Qiao,

Guanglu Wan,

Lin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Chao and Chen, Shimin and Lin, Minliang and Qiao, Limeng and Wan, Guanglu and Ma, Lin}, title = {UniComp: Rethinking Video Compression Through Informational Uniqueness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18609-18618} }
AD-GBC: Anisotropic Granular-Ball Skip-Connection Refiner for UNet-Based Medical Image Segmentation: Xiya Shen,

Qinglin Zhao,

Li Feng; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Xiya and Zhao, Qinglin and Feng, Li}, title = {AD-GBC: Anisotropic Granular-Ball Skip-Connection Refiner for UNet-Based Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1418-1427} }
White-Balance First, Adjust Later: Cross-Camera Color Constancy via Vision-Language Evaluation: Shuwei Li,

Lei Tan,

Robby T. Tan; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shuwei and Tan, Lei and Tan, Robby T.}, title = {White-Balance First, Adjust Later: Cross-Camera Color Constancy via Vision-Language Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1331-1341} }
Reallocating Attention Across Layers to Reduce Multimodal Hallucination: Haolang Lu,

Bolun Chu,

WeiYe Fu,

Guoshun Nan,

Junning Liu,

Minghui Pan,

Qiankun Li,

Yi Yu,

Hua Wang,

Kun Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Haolang and Chu, Bolun and Fu, WeiYe and Nan, Guoshun and Liu, Junning and Pan, Minghui and Li, Qiankun and Yu, Yi and Wang, Hua and Wang, Kun}, title = {Reallocating Attention Across Layers to Reduce Multimodal Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4157-4167} }
PanoEnv: Exploring 3D Spatial Intelligence in Panoramic Environments with Reinforcement Learning: Zekai Lin,

Xu Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zekai and Zheng, Xu}, title = {PanoEnv: Exploring 3D Spatial Intelligence in Panoramic Environments with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9647-9657} }
Erasing Thousands of Concepts: Towards Scalable and Practical Concept Erasure for Text-to-Image Diffusion Models: Hoigi Seo,

Byung Hyun Lee,

Jaehyun Cho,

Sungjin Lim,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Hoigi and Lee, Byung Hyun and Cho, Jaehyun and Lim, Sungjin and Chun, Se Young}, title = {Erasing Thousands of Concepts: Towards Scalable and Practical Concept Erasure for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10262-10272} }
TALON: Test-time Adaptive Learning for On-the-Fly Category Discovery: Yanan Wu,

Yuhan Yan,

Tailai Chen,

Zhixiang Chi,

ZiZhang Wu,

Yi Jin,

Yang Wang,

Zhenbo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yanan and Yan, Yuhan and Chen, Tailai and Chi, Zhixiang and Wu, ZiZhang and Jin, Yi and Wang, Yang and Li, Zhenbo}, title = {TALON: Test-time Adaptive Learning for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22259-22269} }
SenseSearch: Empowering Vision-Language Models with High-Resolution Agentic Search-Reasoning via Reinforcement Learning: Yong Xien Chng,

Tao Hu,

Wenwen Tong,

Xueheng Li,

Jiandong Chen,

Haojia Yu,

Jiefan Lu,

Hewei Guo,

Hanming Deng,

Chengjun Xie,

Gao Huang,

Lewei Lu; [pdf] [supp]
[bibtex]
@InProceedings{Chng_2026_CVPR, author = {Chng, Yong Xien and Hu, Tao and Tong, Wenwen and Li, Xueheng and Chen, Jiandong and Yu, Haojia and Lu, Jiefan and Guo, Hewei and Deng, Hanming and Xie, Chengjun and Huang, Gao and Lu, Lewei}, title = {SenseSearch: Empowering Vision-Language Models with High-Resolution Agentic Search-Reasoning via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26508-26517} }
REArtGS++: Generalizable Articulation Reconstruction with Temporal Geometry Constraint via Planar Gaussian Splatting: Di Wu,

Liu Liu,

Anran Huang,

Yuyan Liu,

Qiaojun Yu,

Shaofan Liu,

Liangtu Song,

Cewu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Di and Liu, Liu and Huang, Anran and Liu, Yuyan and Yu, Qiaojun and Liu, Shaofan and Song, Liangtu and Lu, Cewu}, title = {REArtGS++: Generalizable Articulation Reconstruction with Temporal Geometry Constraint via Planar Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1177-1186} }
Quant Experts: Token-aware Adaptive Error Reconstruction with Mixture of Experts for Large Vision-Language Models Quantization: Chenwei Jia,

Baoting Li,

Xuchong Zhang,

Mingzhuo Wei,

Bochen Lin,

Hongbin Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Chenwei and Li, Baoting and Zhang, Xuchong and Wei, Mingzhuo and Lin, Bochen and Sun, Hongbin}, title = {Quant Experts: Token-aware Adaptive Error Reconstruction with Mixture of Experts for Large Vision-Language Models Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24716-24726} }
Efficient and High-Fidelity Omni Modality Retrieval: Chuong Huynh,

Manh Luong,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huynh_2026_CVPR, author = {Huynh, Chuong and Luong, Manh and Shrivastava, Abhinav}, title = {Efficient and High-Fidelity Omni Modality Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8770-8780} }
SynCLIP: Synonym-Coherent Language-Image Pretraining for Robust Open-Vocabulary Dense Perception: Mingjie Xie,

Guangjun He,

Dongli Xu,

Youtian Lin,

Hongjue Li,

Pengming Feng,

Jian Guan,

Yue Deng; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Mingjie and He, Guangjun and Xu, Dongli and Lin, Youtian and Li, Hongjue and Feng, Pengming and Guan, Jian and Deng, Yue}, title = {SynCLIP: Synonym-Coherent Language-Image Pretraining for Robust Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31524-31533} }
Dr.Occ: Depth- and Region-Guided 3D Occupancy from Surround-View Cameras for Autonomous Driving: Xubo Zhu,

Haoyang Zhang,

Fei He,

Rui Wu,

Yanhu Shan,

Wen Yang,

Huai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xubo and Zhang, Haoyang and He, Fei and Wu, Rui and Shan, Yanhu and Yang, Wen and Yu, Huai}, title = {Dr.Occ: Depth- and Region-Guided 3D Occupancy from Surround-View Cameras for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28610-28619} }
More than the Sum: Panorama-Language Models for Adverse Omni-Scenes: Weijia Fan,

Ruiping Liu,

Jiale Wei,

Yufan Chen,

Junwei Zheng,

Zichao Zeng,

Jiaming Zhang,

Qiufu Li,

Linlin Shen,

Rainer Stiefelhagen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Weijia and Liu, Ruiping and Wei, Jiale and Chen, Yufan and Zheng, Junwei and Zeng, Zichao and Zhang, Jiaming and Li, Qiufu and Shen, Linlin and Stiefelhagen, Rainer}, title = {More than the Sum: Panorama-Language Models for Adverse Omni-Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30874-30884} }
AdaSpot: Spend Resolution Where It Matters for Precise Event Spotting: Artur Xarles,

Sergio Escalera,

Thomas B. Moeslund,

Albert Clapés; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xarles_2026_CVPR, author = {Xarles, Artur and Escalera, Sergio and Moeslund, Thomas B. and Clap\'es, Albert}, title = {AdaSpot: Spend Resolution Where It Matters for Precise Event Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24010-24020} }
Prompt-Anchored Vision-Text Distillation for Lifelong Person Re-identification: Wen Wen,

Hao Chen,

Shiliang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Wen and Chen, Hao and Zhang, Shiliang}, title = {Prompt-Anchored Vision-Text Distillation for Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18503-18512} }
What Is the Optimal Ranking Score Between Precision and Recall? We Can Always Find It and It Is Rarely F1: Sébastien Piérard,

Adrien Deliège,

Marc Van Droogenbroeck; [pdf] [supp]
[bibtex]
@InProceedings{Pierard_2026_CVPR, author = {Pi\'erard, S\'ebastien and Deli\`ege, Adrien and Van Droogenbroeck, Marc}, title = {What Is the Optimal Ranking Score Between Precision and Recall? We Can Always Find It and It Is Rarely F1}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9722-9731} }
From Spots to Pixels: Dense Spatial Gene Expression Prediction from Histology Images: Ruikun Zhang,

Yan Yang,

Liyuan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruikun and Yang, Yan and Pan, Liyuan}, title = {From Spots to Pixels: Dense Spatial Gene Expression Prediction from Histology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19791-19800} }
Rethinking Pose Refinement in 3D Gaussian Splatting under Pose Prior and Geometric Uncertainty: Mangyu Kong,

Jaewon Lee,

Seongwon Lee,

Euntai Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Mangyu and Lee, Jaewon and Lee, Seongwon and Kim, Euntai}, title = {Rethinking Pose Refinement in 3D Gaussian Splatting under Pose Prior and Geometric Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25958-25968} }
RetouchIQ: MLLM Agents for Instruction-Based Image Retouching with Generalist Reward: Qiucheng Wu,

Jing Shi,

Simon Jenni,

Kushal Kafle,

Tianyu Wang,

Shiyu Chang,

Handong Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Qiucheng and Shi, Jing and Jenni, Simon and Kafle, Kushal and Wang, Tianyu and Chang, Shiyu and Zhao, Handong}, title = {RetouchIQ: MLLM Agents for Instruction-Based Image Retouching with Generalist Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12279-12288} }
Anomaly-Related Residual Fields for Cross-domain Anomaly Detection: Kewei Gao,

Jiayi Xie,

Zhengda Shen,

Weijun Qin,

Lingxiang Jia,

Kejia Chen,

Zunlei Feng,

Yijun Bei; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Kewei and Xie, Jiayi and Shen, Zhengda and Qin, Weijun and Jia, Lingxiang and Chen, Kejia and Feng, Zunlei and Bei, Yijun}, title = {Anomaly-Related Residual Fields for Cross-domain Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35617-35627} }
DreamSR: Towards Ultra-High-Resolution Image Super-Resolution via a Receptive-Field Enhanced Diffusion Transformer: Qingji Dong,

Hang Dong,

Mingqin Chen,

Rui Zhang,

Yitong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Qingji and Dong, Hang and Chen, Mingqin and Zhang, Rui and Wang, Yitong}, title = {DreamSR: Towards Ultra-High-Resolution Image Super-Resolution via a Receptive-Field Enhanced Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38258-38269} }
LongStream: Long-Sequence Streaming Autoregressive Visual Geometry: Chong Cheng,

Xianda Chen,

Tao Xie,

Wei Yin,

Weiqiang Ren,

Qian Zhang,

Xiaoyang Guo,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Chong and Chen, Xianda and Xie, Tao and Yin, Wei and Ren, Weiqiang and Zhang, Qian and Guo, Xiaoyang and Wang, Hao}, title = {LongStream: Long-Sequence Streaming Autoregressive Visual Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {272-283} }
VisRef: Visual Refocusing while Thinking Improves Test-Time Scaling in Multi-Modal Large Reasoning Models: Soumya Suvra Ghosal,

Youngeun Kim,

Zhuowei Li,

Ritwick Chaudhry,

Linghan Xu,

Hongjing Zhang,

Jakub Zablocki,

Yifan Xing,

Qin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosal_2026_CVPR, author = {Ghosal, Soumya Suvra and Kim, Youngeun and Li, Zhuowei and Chaudhry, Ritwick and Xu, Linghan and Zhang, Hongjing and Zablocki, Jakub and Xing, Yifan and Zhang, Qin}, title = {VisRef: Visual Refocusing while Thinking Improves Test-Time Scaling in Multi-Modal Large Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33404-33414} }
Enhancing the Security of Visual Speaker Authentication Based on Dynamic Lip-Print Analysis: Yi He,

Lei Yang,

Bofan Chen,

Shilin Wang; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yi and Yang, Lei and Chen, Bofan and Wang, Shilin}, title = {Enhancing the Security of Visual Speaker Authentication Based on Dynamic Lip-Print Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35462-35471} }
Ultra Diffusion Poser: Diffusion-Based Human Motion Tracking from Sparse Inertial Sensors and Ranging-based Between-sensor Distances: Dominik Hollidt,

Tommaso Bendinelli,

Christian Holz; [pdf] [supp]
[bibtex]
@InProceedings{Hollidt_2026_CVPR, author = {Hollidt, Dominik and Bendinelli, Tommaso and Holz, Christian}, title = {Ultra Diffusion Poser: Diffusion-Based Human Motion Tracking from Sparse Inertial Sensors and Ranging-based Between-sensor Distances}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7036-7046} }
Beyond Mimicry: Learning Whole-Body Human-Humanoid Interaction from Human-Human Demonstrations: Wei-Jin Huang,

Yue-Yi Zhang,

Yi-Lin Wei,

Zhi-Wei Xia,

Juantao Tan,

Yuan-Ming Li,

Zhilin Zhao,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Wei-Jin and Zhang, Yue-Yi and Wei, Yi-Lin and Xia, Zhi-Wei and Tan, Juantao and Li, Yuan-Ming and Zhao, Zhilin and Zheng, Wei-Shi}, title = {Beyond Mimicry: Learning Whole-Body Human-Humanoid Interaction from Human-Human Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30740-30749} }
Pluggable Pruning with Contiguous Layer Distillation for Diffusion Transformers: Jian Ma,

Qirong Peng,

Xujie Zhu,

Peixing Xie,

Chen Chen,

Haonan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Jian and Peng, Qirong and Zhu, Xujie and Xie, Peixing and Chen, Chen and Lu, Haonan}, title = {Pluggable Pruning with Contiguous Layer Distillation for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18640-18650} }
PersonaVLM: Long-Term Personalized Multimodal LLMs: Chang Nie,

Chaoyou Fu,

Yifan Zhang,

Haihua Yang,

Caifeng Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Chang and Fu, Chaoyou and Zhang, Yifan and Yang, Haihua and Shan, Caifeng}, title = {PersonaVLM: Long-Term Personalized Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15000-15009} }
APPO: Attention-guided Perception Policy Optimization for Video Reasoning: Henghui Du,

Chang Zhou,

Xi Chen,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Henghui and Zhou, Chang and Chen, Xi and Hu, Di}, title = {APPO: Attention-guided Perception Policy Optimization for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12269-12278} }
An Efficient Token Compression Framework for Visual Object Tracking: Weijing Wu,

Qihua Liang,

Bineng Zhong,

Haiying Xia,

Zhiyi Mo,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Weijing and Liang, Qihua and Zhong, Bineng and Xia, Haiying and Mo, Zhiyi and Song, Shuxiang}, title = {An Efficient Token Compression Framework for Visual Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6857-6867} }
GraphVLM: Benchmarking Vision Language Models for Multimodal Graph Learning: Jiajin Liu,

Dongzhe Fan,

Chuanhao Ji,

Daochen Zha,

Qiaoyu Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiajin and Fan, Dongzhe and Ji, Chuanhao and Zha, Daochen and Tan, Qiaoyu}, title = {GraphVLM: Benchmarking Vision Language Models for Multimodal Graph Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9491-9500} }
Boundary-Responsive Differentiable Gating for Superpixel-Based Segmentation: Fatmaelzahraa Ahmed,

Zhihe Lu,

Gianni Caro,

Diram Tabaa,

Mohamed Hamdy,

Muraam Abdel-Ghani,

Abdulaziz Al-Ali,

Muhammad Arsalan,

Shidin Balakrishnan; [pdf] [supp]
[bibtex]
@InProceedings{Ahmed_2026_CVPR, author = {Ahmed, Fatmaelzahraa and Lu, Zhihe and Caro, Gianni and Tabaa, Diram and Hamdy, Mohamed and Abdel-Ghani, Muraam and Al-Ali, Abdulaziz and Arsalan, Muhammad and Balakrishnan, Shidin}, title = {Boundary-Responsive Differentiable Gating for Superpixel-Based Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42137-42146} }
Rethinking Asymmetric Quantization: Hidden Symmetry in Vision Model Weights: Masafumi Mori,

Shinya Gongyo,

Mitsuru Ambai; [pdf] [supp]
[bibtex]
@InProceedings{Mori_2026_CVPR, author = {Mori, Masafumi and Gongyo, Shinya and Ambai, Mitsuru}, title = {Rethinking Asymmetric Quantization: Hidden Symmetry in Vision Model Weights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33609-33620} }
Hierarchical Enhancement of Semantic Priors for Disentangled Text-Driven Motion Generation: Wenhan Lv,

Shaopan Wang,

Xiangyu Wu,

Tianchu Hang,

Zhongquan Jian,

Qingqiang Wu; [pdf] [supp]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Wenhan and Wang, Shaopan and Wu, Xiangyu and Hang, Tianchu and Jian, Zhongquan and Wu, Qingqiang}, title = {Hierarchical Enhancement of Semantic Priors for Disentangled Text-Driven Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14844-14853} }
MovieRecapsQA: A Multimodal Open-Ended Video Question-Answering Benchmark: Shaden Shaar,

Bradon Thymes,

Sirawut Chaixanien,

Claire Cardie,

Bharath Hariharan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shaar_2026_CVPR, author = {Shaar, Shaden and Thymes, Bradon and Chaixanien, Sirawut and Cardie, Claire and Hariharan, Bharath}, title = {MovieRecapsQA: A Multimodal Open-Ended Video Question-Answering Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4537-4546} }
PhysHead: Simulation-Ready Gaussian Head Avatars: Berna Kabadayi,

Vanessa Sklyarova,

Wojciech Zielonka,

Justus Thies,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kabadayi_2026_CVPR, author = {Kabadayi, Berna and Sklyarova, Vanessa and Zielonka, Wojciech and Thies, Justus and Pons-Moll, Gerard}, title = {PhysHead: Simulation-Ready Gaussian Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4109-4121} }
Scalable Object Relation Encoding for Better 3D Spatial Reasoning in Large Language Models: Shengli Zhou,

Minghang Zheng,

Feng Zheng,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shengli and Zheng, Minghang and Zheng, Feng and Liu, Yang}, title = {Scalable Object Relation Encoding for Better 3D Spatial Reasoning in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16758-16767} }
Physical Simulator In-the-Loop Video Generation: Lin Geng Foo,

Mark He Huang,

Alexandros Lattas,

Stylianos Moschoglou,

Thabo Beeler,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Foo_2026_CVPR, author = {Foo, Lin Geng and Huang, Mark He and Lattas, Alexandros and Moschoglou, Stylianos and Beeler, Thabo and Theobalt, Christian}, title = {Physical Simulator In-the-Loop Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4301-4311} }
Multimodal Protein Language Models for Enzyme Kinetic Parameters: From Substrate Recognition to Conformational Adaptation: Fei Wang,

Xinye Zheng,

Kun Li,

Yanyan Wei,

Yuxin Liu,

Ganpeng Hu,

Tong Bao,

Jingwen Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Fei and Zheng, Xinye and Li, Kun and Wei, Yanyan and Liu, Yuxin and Hu, Ganpeng and Bao, Tong and Yang, Jingwen}, title = {Multimodal Protein Language Models for Enzyme Kinetic Parameters: From Substrate Recognition to Conformational Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15829-15839} }
PR-IQA: Partial-Reference Image Quality Assessment for Diffusion-Based Novel View Synthesis: Inseong Choi,

Siwoo Lee,

Seung-Hun Nam,

Soohwan Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Inseong and Lee, Siwoo and Nam, Seung-Hun and Song, Soohwan}, title = {PR-IQA: Partial-Reference Image Quality Assessment for Diffusion-Based Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37301-37310} }
FloVerse: Floor Plan-Guided Multi-Modal Navigation: Weiqi Huang,

Shuangyi Dong,

Jiaxin Li,

Yifei Guo,

Zan Wang,

Wei Liang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Weiqi and Dong, Shuangyi and Li, Jiaxin and Guo, Yifei and Wang, Zan and Liang, Wei}, title = {FloVerse: Floor Plan-Guided Multi-Modal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15156-15165} }
ConceptPose: Training-Free Zero-Shot Object Pose Estimation using Concept Vectors: Liming Kuang,

Yordanka Velikova,

Mahdi Saleh,

Jan-Nico Zaech,

Danda Pani Paudel,

Benjamin Busam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2026_CVPR, author = {Kuang, Liming and Velikova, Yordanka and Saleh, Mahdi and Zaech, Jan-Nico and Paudel, Danda Pani and Busam, Benjamin}, title = {ConceptPose: Training-Free Zero-Shot Object Pose Estimation using Concept Vectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26582-26592} }
JoPPO: Hierarchical Photography Assessment via Contrastive Joint Conditional Probabilistic Reinforcement Learning: Yifan Yang,

Juntuo Wang,

Yuming Qiao,

Xudong Zhang,

Chunyang Yu,

Yan Li,

Xiao Lin,

Liang Luo,

Dan Meng; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yifan and Wang, Juntuo and Qiao, Yuming and Zhang, Xudong and Yu, Chunyang and Li, Yan and Lin, Xiao and Luo, Liang and Meng, Dan}, title = {JoPPO: Hierarchical Photography Assessment via Contrastive Joint Conditional Probabilistic Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11684-11693} }
Fine-VAD: Towards Fine-Grained Video Anomaly Detection via Progressive Cross-Granularity Learning: Menghao Zhang,

Yiyan Zhu,

Pengfei Ren,

Haifeng Sun,

Qi Qi,

Zirui Zhuang,

Huazheng Wang,

Lei Zhang,

Jianxin Liao,

Jingyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Menghao and Zhu, Yiyan and Ren, Pengfei and Sun, Haifeng and Qi, Qi and Zhuang, Zirui and Wang, Huazheng and Zhang, Lei and Liao, Jianxin and Wang, Jingyu}, title = {Fine-VAD: Towards Fine-Grained Video Anomaly Detection via Progressive Cross-Granularity Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35514-35523} }
Functional Mean Flow in Hilbert Space: Zhiqi Li,

Yuchen Sun,

Greg Turk,

Bo Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiqi and Sun, Yuchen and Turk, Greg and Zhu, Bo}, title = {Functional Mean Flow in Hilbert Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1928-1938} }
CausalVAD: De-confounding End-to-End Autonomous Driving via Causal Intervention: Jiacheng Tang,

Zhiyuan Zhou,

Zhuolin He,

Jia Zhang,

Kai Zhang,

Jian Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jiacheng and Zhou, Zhiyuan and He, Zhuolin and Zhang, Jia and Zhang, Kai and Pu, Jian}, title = {CausalVAD: De-confounding End-to-End Autonomous Driving via Causal Intervention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32124-32133} }
AdaBet: Gradient-free Layer Selection for Efficient Training of Deep Neural Networks: Irene Tenison,

Soumyajit Chatterjee,

Fahim Kawsar,

Mohammad Malekzadeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tenison_2026_CVPR, author = {Tenison, Irene and Chatterjee, Soumyajit and Kawsar, Fahim and Malekzadeh, Mohammad}, title = {AdaBet: Gradient-free Layer Selection for Efficient Training of Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20233-20242} }
Dual Ascent Diffusion for Inverse Problems: Minseo Kim,

Axel Levy,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minseo and Levy, Axel and Wetzstein, Gordon}, title = {Dual Ascent Diffusion for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23622-23631} }
One Patch to Caption Them All: A Unified Zero-Shot Captioning Framework: Lorenzo Bianchi,

Giacomo Pacini,

Fabio Carrara,

Nicola Messina,

Giuseppe Amato,

Fabrizio Falchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bianchi_2026_CVPR, author = {Bianchi, Lorenzo and Pacini, Giacomo and Carrara, Fabio and Messina, Nicola and Amato, Giuseppe and Falchi, Fabrizio}, title = {One Patch to Caption Them All: A Unified Zero-Shot Captioning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5532-5542} }
ReGenHOI: Unifying Reconstruction and Generation for 3D Human-Object Interaction Understanding: Miao Xu,

Xiangyu Zhu,

Zidu Wang,

Xusheng Liang,

Bao Li,

Jinlin Wu,

Zelin Zang,

Zhen Lei; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Miao and Zhu, Xiangyu and Wang, Zidu and Liang, Xusheng and Li, Bao and Wu, Jinlin and Zang, Zelin and Lei, Zhen}, title = {ReGenHOI: Unifying Reconstruction and Generation for 3D Human-Object Interaction Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42847-42857} }
Masked Region Transformer for Layered Image Generation and Editing at Scale: Zhicong Tang,

Jingye Chen,

Zhao Zhang,

Mohan Zhou,

Yuchi Liu,

Yifan Pu,

Yalong Bai,

Ethan Smith,

Yuhui Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zhicong and Chen, Jingye and Zhang, Zhao and Zhou, Mohan and Liu, Yuchi and Pu, Yifan and Bai, Yalong and Smith, Ethan and Yuan, Yuhui}, title = {Masked Region Transformer for Layered Image Generation and Editing at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40622-40632} }
Grounded 3D-Aware Spatial Vision-Language Modeling: An-Chieh Cheng,

Yang Fu,

Yatai Ji,

Ligeng Zhu,

Guanqi Zhan,

Zhuoyang Zhang,

Zhaojing Yang,

Song Han,

Yao Lu,

Pavlo Molchanov,

Vidya Nariyambut Murali,

Jan Kautz,

Xiaolong Wang,

Hongxu Yin,

Sifei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, An-Chieh and Fu, Yang and Ji, Yatai and Zhu, Ligeng and Zhan, Guanqi and Zhang, Zhuoyang and Yang, Zhaojing and Han, Song and Lu, Yao and Molchanov, Pavlo and Murali, Vidya Nariyambut and Kautz, Jan and Wang, Xiaolong and Yin, Hongxu and Liu, Sifei}, title = {Grounded 3D-Aware Spatial Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16688-16700} }
Fine-Grained Post-Training Quantization for Large Vision Language Models with Quantization-Aware Integrated Gradients: Ziwei Xiang,

Fanhu Zeng,

Hongjian Fang,

Rui-Qi Wang,

Renxing Chen,

Yanan Zhu,

Yi Chen,

Peipei Yang,

Xu-Yao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Ziwei and Zeng, Fanhu and Fang, Hongjian and Wang, Rui-Qi and Chen, Renxing and Zhu, Yanan and Chen, Yi and Yang, Peipei and Zhang, Xu-Yao}, title = {Fine-Grained Post-Training Quantization for Large Vision Language Models with Quantization-Aware Integrated Gradients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3500-3510} }
Dr. Seg: Revisiting GRPO Training for Visual Large Language Models through Perception-Oriented Design: Haoxiang Sun,

Tao Wang,

Chenwei Tang,

Li Yuan,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Haoxiang and Wang, Tao and Tang, Chenwei and Yuan, Li and Lv, Jiancheng}, title = {Dr. Seg: Revisiting GRPO Training for Visual Large Language Models through Perception-Oriented Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24320-24329} }
GR-Gauge: Cost-efficient Training Configuration By Gauging the Gradient Redundancy: Guanjie Wang,

Chen Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Guanjie and Chen, Chen}, title = {GR-Gauge: Cost-efficient Training Configuration By Gauging the Gradient Redundancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12934-12943} }
DROID-SLAM in the Wild: Moyang Li,

Zihan Zhu,

Marc Pollefeys,

Daniel Barath; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Moyang and Zhu, Zihan and Pollefeys, Marc and Barath, Daniel}, title = {DROID-SLAM in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36498-36508} }
Towards Robust Multi-Modal Semantic Segmentation with Teacher-Student Framework and Hybrid Prototype Distillation: Jiaqi Tan,

Xu Zheng,

Yang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Jiaqi and Zheng, Xu and Liu, Yang}, title = {Towards Robust Multi-Modal Semantic Segmentation with Teacher-Student Framework and Hybrid Prototype Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27665-27675} }
EchoFoley: Event-Centric Hierarchical Control for Video Grounded Creative Sound Generation: Bingxuan Li,

Yiming Cui,

Yicheng He,

Yiwei Wang,

Shu Zhang,

Longyin Wen,

Yulei Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingxuan and Cui, Yiming and He, Yicheng and Wang, Yiwei and Zhang, Shu and Wen, Longyin and Niu, Yulei}, title = {EchoFoley: Event-Centric Hierarchical Control for Video Grounded Creative Sound Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27229-27238} }
Intrinsic Concept Extraction Based on Compositional Interpretability: Hanyu Shi,

Hong Tao,

Guoheng Huang,

Jianbin Jiang,

Xuhang Chen,

Chi-Man Pun,

Shanhu Wang,

Pan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Hanyu and Tao, Hong and Huang, Guoheng and Jiang, Jianbin and Chen, Xuhang and Pun, Chi-Man and Wang, Shanhu and Pan, Pan}, title = {Intrinsic Concept Extraction Based on Compositional Interpretability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38969-38978} }
Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing: Rishubh Parihar,

Or Patashnik,

Daniil Ostashev,

Venkatesh Babu Radhakrishnan,

Daniel Cohen-Or,

Kuan-Chieh Jackson Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parihar_2026_CVPR, author = {Parihar, Rishubh and Patashnik, Or and Ostashev, Daniil and Radhakrishnan, Venkatesh Babu and Cohen-Or, Daniel and Wang, Kuan-Chieh Jackson}, title = {Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37929-37939} }
AffordGrasp: Cross-Modal Diffusion for Affordance-Aware Grasp Synthesis: Xiaofei Wu,

Yi Zhang,

Yumeng Liu,

Yuexin Ma,

Yujiao Shi,

Xuming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaofei and Zhang, Yi and Liu, Yumeng and Ma, Yuexin and Shi, Yujiao and He, Xuming}, title = {AffordGrasp: Cross-Modal Diffusion for Affordance-Aware Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15944-15953} }
RAYNOVA: Scale-Temporal Autoregressive World Modeling in Ray Space: Yichen Xie,

Chensheng Peng,

Mazen Abdelfattah,

Yihan Hu,

Jiezhi Yang,

Eric Higgins,

Ryan Brigden,

Masayoshi Tomizuka,

Wei Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yichen and Peng, Chensheng and Abdelfattah, Mazen and Hu, Yihan and Yang, Jiezhi and Higgins, Eric and Brigden, Ryan and Tomizuka, Masayoshi and Zhan, Wei}, title = {RAYNOVA: Scale-Temporal Autoregressive World Modeling in Ray Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25426-25437} }
SPEGC: Continual Test-Time Adaptation via Semantic-Prompt-Enhanced Graph Clustering for Medical Image Segmentation: Xiaogang Du,

Jiawei Zhang,

Tongfei Liu,

Tao Lei,

Yingbo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Xiaogang and Zhang, Jiawei and Liu, Tongfei and Lei, Tao and Wang, Yingbo}, title = {SPEGC: Continual Test-Time Adaptation via Semantic-Prompt-Enhanced Graph Clustering for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8481-8491} }
ExtrinSplat: Decoupling Geometry and Semantics for Open-Vocabulary Understanding in 3D Gaussian Splatting: Jiayu Ding,

Xinpeng Liu,

Zhiyi Pan,

Shiqiang Long,

Ge Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Jiayu and Liu, Xinpeng and Pan, Zhiyi and Long, Shiqiang and Li, Ge}, title = {ExtrinSplat: Decoupling Geometry and Semantics for Open-Vocabulary Understanding in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31019-31028} }
Vocabulary Scaling Law: Tuning Open-vocabulary Predictors for Their Openness: Ziliang Chen,

Yulu Li,

Liangda Fang,

Jusheng Zhang,

Yongsen Zheng,

Quanlong Guan,

Xipeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ziliang and Li, Yulu and Fang, Liangda and Zhang, Jusheng and Zheng, Yongsen and Guan, Quanlong and Chen, Xipeng}, title = {Vocabulary Scaling Law: Tuning Open-vocabulary Predictors for Their Openness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3091-3100} }
The Consistency Critic: Correcting Inconsistencies in Generated Images via Reference-Guided Attentive Alignment: Ziheng Ouyang,

Yiren Song,

Yaoli Liu,

Shihao Zhu,

Qibin Hou,

Ming-Ming Cheng,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Ziheng and Song, Yiren and Liu, Yaoli and Zhu, Shihao and Hou, Qibin and Cheng, Ming-Ming and Shou, Mike Zheng}, title = {The Consistency Critic: Correcting Inconsistencies in Generated Images via Reference-Guided Attentive Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2035-2046} }
StyleDoctor: Towards Specialist Reward Model for Style-centric Generation Tasks: Xilin He,

Xiaole Xian,

Xiangyu Yue,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xilin and Xian, Xiaole and Yue, Xiangyu and Khan, Muhammad Haris}, title = {StyleDoctor: Towards Specialist Reward Model for Style-centric Generation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29126-29135} }
Where Does Vision Meet Language? Understanding and Refining Visual Fusion in MLLMs via Contrastive Attention: Shezheng Song,

Shasha Li,

Shan Zhao,

Xiaopeng Li,

Qian Wan,

Chengyu Wang,

Tianwei Yan,

Ma Jun,

Jie Yu; [pdf]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Shezheng and Li, Shasha and Zhao, Shan and Li, Xiaopeng and Wan, Qian and Wang, Chengyu and Yan, Tianwei and Jun, Ma and Yu, Jie}, title = {Where Does Vision Meet Language? Understanding and Refining Visual Fusion in MLLMs via Contrastive Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10051-10060} }
From Events to Clarity: The Event-Guided Diffusion Framework for Dehazing: Ling Wang,

Yunfan Lu,

Wenzong Ma,

Huizai Yao,

Pengteng Li,

Hui Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ling and Lu, Yunfan and Ma, Wenzong and Yao, Huizai and Li, Pengteng and Xiong, Hui}, title = {From Events to Clarity: The Event-Guided Diffusion Framework for Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34028-34039} }
Taming Preference Mode Collapse via Directional Decoupling Alignment in Diffusion Reinforcement Learning: Chubin Chen,

Sujie Hu,

Jiashu Zhu,

Meiqi Wu,

Jintao Chen,

Yanxun Li,

Nisha Huang,

Chengyu Fang,

Jiahong Wu,

Xiangxiang Chu,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Chubin and Hu, Sujie and Zhu, Jiashu and Wu, Meiqi and Chen, Jintao and Li, Yanxun and Huang, Nisha and Fang, Chengyu and Wu, Jiahong and Chu, Xiangxiang and Li, Xiu}, title = {Taming Preference Mode Collapse via Directional Decoupling Alignment in Diffusion Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12775-12786} }
DocPrune: Efficient Document Question Answering via Background, Question, and Comprehension-aware Token Pruning: Joonmyung Choi,

Sanghyeok Lee,

Jongha Kim,

Sehyung Kim,

Dohwan Ko,

Jihyung Kil,

Hyunwoo J. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Joonmyung and Lee, Sanghyeok and Kim, Jongha and Kim, Sehyung and Ko, Dohwan and Kil, Jihyung and Kim, Hyunwoo J.}, title = {DocPrune: Efficient Document Question Answering via Background, Question, and Comprehension-aware Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3543-3552} }
Beyond Text Prompts: Precise Concept Erasure through Text-Image Collaboration: Jun Li,

Lizhi Xiong,

Ziqiang Li,

Weiwei Jiang,

Zhangjie Fu,

Yong Li,

Guo-Sen Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jun and Xiong, Lizhi and Li, Ziqiang and Jiang, Weiwei and Fu, Zhangjie and Li, Yong and Xie, Guo-Sen}, title = {Beyond Text Prompts: Precise Concept Erasure through Text-Image Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37653-37663} }
POUR: A Provably Optimal Method for Unlearning Representation via Neural Collapse: Anjie Le,

Can Peng,

Yuyuan Liu,

J. Alison Noble; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Anjie and Peng, Can and Liu, Yuyuan and Noble, J. Alison}, title = {POUR: A Provably Optimal Method for Unlearning Representation via Neural Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10273-10282} }
FlowMotion: Training-Free Flow Guidance for Video Motion Transfer: Zhen Wang,

Youcan Xu,

Jun Xiao,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhen and Xu, Youcan and Xiao, Jun and Chen, Long}, title = {FlowMotion: Training-Free Flow Guidance for Video Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38143-38153} }
Skyra: AI-Generated Video Detection via Grounded Artifact Reasoning: Yifei Li,

Wenzhao Zheng,

Yanran Zhang,

Runze Sun,

Yu Zheng,

Lei Chen,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yifei and Zheng, Wenzhao and Zhang, Yanran and Sun, Runze and Zheng, Yu and Chen, Lei and Zhou, Jie and Lu, Jiwen}, title = {Skyra: AI-Generated Video Detection via Grounded Artifact Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4482-4493} }
Visual Diffusion Models are Geometric Solvers: Nir Goren,

Shai Yehezkel,

Omer Dahary,

Andrey Voynov,

Or Patashnik,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goren_2026_CVPR, author = {Goren, Nir and Yehezkel, Shai and Dahary, Omer and Voynov, Andrey and Patashnik, Or and Cohen-Or, Daniel}, title = {Visual Diffusion Models are Geometric Solvers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43187-43196} }
Image Diffusion Preview with Consistency Solver: Fu-Yun Wang,

Hao Zhou,

Liangzhe Yuan,

Sanghyun Woo,

Boqing Gong,

Bohyung Han,

Ming-Hsuan Yang,

Han Zhang,

Yukun Zhu,

Ting Liu,

Long Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Fu-Yun and Zhou, Hao and Yuan, Liangzhe and Woo, Sanghyun and Gong, Boqing and Han, Bohyung and Yang, Ming-Hsuan and Zhang, Han and Zhu, Yukun and Liu, Ting and Zhao, Long}, title = {Image Diffusion Preview with Consistency Solver}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43271-43280} }
LATTICE: Democratize High-Fidelity 3D Generation at Scale: Zeqiang Lai,

Yunfei Zhao,

Zibo Zhao,

Haolin Liu,

Qingxiang Lin,

Jingwei Huang,

Chunchao Guo,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Zeqiang and Zhao, Yunfei and Zhao, Zibo and Liu, Haolin and Lin, Qingxiang and Huang, Jingwei and Guo, Chunchao and Yue, Xiangyu}, title = {LATTICE: Democratize High-Fidelity 3D Generation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19982-19992} }
A Frame is Worth One Token: Efficient Generative World Modeling with Delta Tokens: Tommie Kerssies,

Gabriele Berton,

Ju He,

Qihang Yu,

Wufei Ma,

Daan de Geus,

Gijs Dubbelman,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kerssies_2026_CVPR, author = {Kerssies, Tommie and Berton, Gabriele and He, Ju and Yu, Qihang and Ma, Wufei and de Geus, Daan and Dubbelman, Gijs and Chen, Liang-Chieh}, title = {A Frame is Worth One Token: Efficient Generative World Modeling with Delta Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27978-27988} }
Resolving the Identity Crisis in Text-to-Image Generation: Shubhankar Borse,

Farzad Farhadzadeh,

Munawar Hayat,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Borse_2026_CVPR, author = {Borse, Shubhankar and Farhadzadeh, Farzad and Hayat, Munawar and Porikli, Fatih}, title = {Resolving the Identity Crisis in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36703-36712} }
Harmony: Harmonizing Audio and Video Generation through Cross-Task Synergy: Teng Hu,

Zhentao Yu,

Guozhen Zhang,

Zihan Su,

Zhengguang Zhou,

Youliang Zhang,

Yuan Zhou,

Qinglin Lu,

Ran Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Teng and Yu, Zhentao and Zhang, Guozhen and Su, Zihan and Zhou, Zhengguang and Zhang, Youliang and Zhou, Yuan and Lu, Qinglin and Yi, Ran}, title = {Harmony: Harmonizing Audio and Video Generation through Cross-Task Synergy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16085-16095} }
MAMMA: Markerless Accurate Multi-person Motion Acquisition: Hanz Cuevas Velasquez,

Anastasios Yiannakidis,

Soyong Shin,

Giorgio Becherini,

Markus Höschle,

Joachim Tesch,

Taylor Obersat,

Tsvetelina Alexiadis,

Eni Halilaj,

Michael J. Black; [pdf] [supp]
[bibtex]
@InProceedings{Velasquez_2026_CVPR, author = {Velasquez, Hanz Cuevas and Yiannakidis, Anastasios and Shin, Soyong and Becherini, Giorgio and H\"oschle, Markus and Tesch, Joachim and Obersat, Taylor and Alexiadis, Tsvetelina and Halilaj, Eni and Black, Michael J.}, title = {MAMMA: Markerless Accurate Multi-person Motion Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7175-7186} }
Dual Graph Regularized Deep Unfolding Network for Guided Depth Map Super-resolution: Zhiwei Zhong,

Peilin Chen,

Qiangqiang Shen,

Bo Li,

Shiqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Zhiwei and Chen, Peilin and Shen, Qiangqiang and Li, Bo and Wang, Shiqi}, title = {Dual Graph Regularized Deep Unfolding Network for Guided Depth Map Super-resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16322-16332} }
Towards Robust Vision Transformers: Path Dependency Analysis and a Simple Two-Stage Adversarial Training: Seongmin Kim,

Byung Cheol Song; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seongmin and Song, Byung Cheol}, title = {Towards Robust Vision Transformers: Path Dependency Analysis and a Simple Two-Stage Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15669-15678} }
First Frame Is the Place to Go for Video Content Customization: Jingxi Chen,

Zongxia Li,

Zhichao Liu,

Guangyao Shi,

Xiyang Wu,

Fuxiao Liu,

Cornelia Fermüller,

Brandon Y. Feng,

Yiannis Aloimonos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jingxi and Li, Zongxia and Liu, Zhichao and Shi, Guangyao and Wu, Xiyang and Liu, Fuxiao and Ferm\"uller, Cornelia and Feng, Brandon Y. and Aloimonos, Yiannis}, title = {First Frame Is the Place to Go for Video Content Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9243-9252} }
High-Quality and Efficient Turbulence Mitigation with Events: Xiaoran Zhang,

Jian Ding,

Yuxing Duan,

Haoyue Liu,

Gang Chen,

Yi Chang,

Luxin Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoran and Ding, Jian and Duan, Yuxing and Liu, Haoyue and Chen, Gang and Chang, Yi and Yan, Luxin}, title = {High-Quality and Efficient Turbulence Mitigation with Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29514-29525} }
HeroGS: Hierarchical Guidance for Robust 3D Gaussian Splatting under Sparse Views: Jiashu Li,

Xumeng Han,

Zhaoyang Wei,

Zipeng Wang,

Kuiran Wang,

Guorong Li,

Zhenjun Han,

Jianbin Jiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiashu and Han, Xumeng and Wei, Zhaoyang and Wang, Zipeng and Wang, Kuiran and Li, Guorong and Han, Zhenjun and Jiao, Jianbin}, title = {HeroGS: Hierarchical Guidance for Robust 3D Gaussian Splatting under Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11788-11797} }
SEA-Vision: A Multilingual Benchmark for Comprehensive Document and Scene Text Understanding in Southeast Asia: Pengfei Yue,

Xingran Zhao,

Juntao Chen,

Peng Hou,

Wang Longchao,

Jianghang Lin,

Shengchuan Zhang,

Anxiang Zeng,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Pengfei and Zhao, Xingran and Chen, Juntao and Hou, Peng and Longchao, Wang and Lin, Jianghang and Zhang, Shengchuan and Zeng, Anxiang and Cao, Liujuan}, title = {SEA-Vision: A Multilingual Benchmark for Comprehensive Document and Scene Text Understanding in Southeast Asia}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30895-30905} }
VITAL: Vision-Encoder-centered Pre-training for LMMs in Visual Quality Assessment: Ziheng Jia,

Linhan Cao,

Jinliang Han,

Zicheng Zhang,

Jiaying Qian,

Jiarui Wang,

Zijian Chen,

Guangtao Zhai,

Xiongkuo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Ziheng and Cao, Linhan and Han, Jinliang and Zhang, Zicheng and Qian, Jiaying and Wang, Jiarui and Chen, Zijian and Zhai, Guangtao and Min, Xiongkuo}, title = {VITAL: Vision-Encoder-centered Pre-training for LMMs in Visual Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41245-41255} }
Jailbreaking Vision-Language Models via Dissonance-Guided Suffix Optimization and Image-Phrase Injection: Jiacheng Pi,

Zhiguo Yang,

Xingxing Huang,

Dongsheng Xu,

Ruizhi Zhong,

Wenjie Ruan; [pdf] [supp]
[bibtex]
@InProceedings{Pi_2026_CVPR, author = {Pi, Jiacheng and Yang, Zhiguo and Huang, Xingxing and Xu, Dongsheng and Zhong, Ruizhi and Ruan, Wenjie}, title = {Jailbreaking Vision-Language Models via Dissonance-Guided Suffix Optimization and Image-Phrase Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30087-30097} }
PDD: Manifold-Prior Diverse Distillation for Medical Anomaly Detection: Xijun Lu,

Hongying Liu,

Fanhua Shang,

Yanming Hui,

Liang Wan; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Xijun and Liu, Hongying and Shang, Fanhua and Hui, Yanming and Wan, Liang}, title = {PDD: Manifold-Prior Diverse Distillation for Medical Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28534-28544} }
Bidirectional Query-Driven Generation of Parametric CAD Sketch: Yang Liu,

Daxuan Ren,

Yijie Ding,

Jianmin Zheng,

Fang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Ren, Daxuan and Ding, Yijie and Zheng, Jianmin and Deng, Fang}, title = {Bidirectional Query-Driven Generation of Parametric CAD Sketch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3176-3185} }
Illustrator's Depth: Monocular Layer Index Prediction for Image Decomposition: Nissim Maruani,

Peiying Zhang,

Siddhartha Chaudhuri,

Matthew Fisher,

Nanxuan Zhao,

Vladimir G. Kim,

Pierre Alliez,

Mathieu Desbrun,

Wang Yifan; [pdf] [supp]
[bibtex]
@InProceedings{Maruani_2026_CVPR, author = {Maruani, Nissim and Zhang, Peiying and Chaudhuri, Siddhartha and Fisher, Matthew and Zhao, Nanxuan and Kim, Vladimir G. and Alliez, Pierre and Desbrun, Mathieu and Yifan, Wang}, title = {Illustrator's Depth: Monocular Layer Index Prediction for Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26824-26834} }
On the Role of Temporal Granularity in the Robustness of Spiking Neural Networks: Mengting Xu,

Shi Gu,

Peng Lin,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Mengting and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {On the Role of Temporal Granularity in the Robustness of Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27946-27955} }
SRGCD: Stability-Driven Region Growth Framework for 3D Change Detection: Yue Wu,

Tao Peng,

Yongzhe Yuan,

Kaiyuan Feng,

Hao Li,

Maoguo Gong,

Qiguang Miao,

Wenping Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yue and Peng, Tao and Yuan, Yongzhe and Feng, Kaiyuan and Li, Hao and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {SRGCD: Stability-Driven Region Growth Framework for 3D Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7546-7555} }
MoCoDiff: A Controllable Autoregressive Diffusion Model for Expressive Motion Generation: Wenfeng Song,

Xuehan Wang,

Shuai Li,

Yi Chen,

Yuting Guo,

Zhenyu Wu,

Xingliang Jin,

Chenglizhao Chen,

Fei Hou,

Hongyu Wu,

Aimin Hao; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Wenfeng and Wang, Xuehan and Li, Shuai and Chen, Yi and Guo, Yuting and Wu, Zhenyu and Jin, Xingliang and Chen, Chenglizhao and Hou, Fei and Wu, Hongyu and Hao, Aimin}, title = {MoCoDiff: A Controllable Autoregressive Diffusion Model for Expressive Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23528-23537} }
SHARP: Short-Window Streaming for Accurate and Robust Prediction in Motion Forecasting: Alexander Prutsch,

Christian Fruhwirth-Reisinger,

David Schinagl,

Horst Possegger; [pdf] [supp]
[bibtex]
@InProceedings{Prutsch_2026_CVPR, author = {Prutsch, Alexander and Fruhwirth-Reisinger, Christian and Schinagl, David and Possegger, Horst}, title = {SHARP: Short-Window Streaming for Accurate and Robust Prediction in Motion Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32103-32112} }
Dual-Estimator: Decoupling Global and Local Semantic Shift for Drift Compensation in Class-Incremental Learning: Fankang Xu,

Lu Jin,

Yanpeng Sun,

Shiyu Xuan,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Dual-Estimator: Decoupling Global and Local Semantic Shift for Drift Compensation in Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10799-10809} }
SAM 3D Body: Robust Full-Body Human Mesh Recovery: Xitong Yang,

Devansh Kukreja,

Don Pinkus,

Taosha Fan,

Jinhyung Park,

Soyong Shin,

Jinkun Cao,

Jia-Wei Liu,

Nicolás Ugrinovic,

Anushka Sagar,

Jitendra Malik,

Matt Feiszli,

Piotr Dollár,

Kris Kitani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xitong and Kukreja, Devansh and Pinkus, Don and Fan, Taosha and Park, Jinhyung and Shin, Soyong and Cao, Jinkun and Liu, Jia-Wei and Ugrinovic, Nicol\'as and Sagar, Anushka and Malik, Jitendra and Feiszli, Matt and Doll\'ar, Piotr and Kitani, Kris}, title = {SAM 3D Body: Robust Full-Body Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7209-7219} }
Boosting Quantitive and Spatial Awareness for Zero-Shot Object Counting: Da Zhang,

Bingyu Li,

Feiyu Wang,

Zhiyuan Zhao,

Junyu Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Da and Li, Bingyu and Wang, Feiyu and Zhao, Zhiyuan and Gao, Junyu}, title = {Boosting Quantitive and Spatial Awareness for Zero-Shot Object Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20356-20366} }
OpenVoxel: Training-Free Grouping and Captioning Voxels for Open-Vocabulary 3D Scene Understanding: Sheng-Yu Huang,

Jaesung Choe,

Yu-Chiang Frank Wang,

Cheng Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Sheng-Yu and Choe, Jaesung and Wang, Yu-Chiang Frank and Sun, Cheng}, title = {OpenVoxel: Training-Free Grouping and Captioning Voxels for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16734-16745} }
GuardTrace-VL: Detecting Unsafe Multimodel Reasoning via Iterative Safety Supervision: Yuxiao Xiang,

Junchi Chen,

Zhenchao Jin,

Changtao Miao,

Haojie Yuan,

Qi Chu,

Tao Gong,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Yuxiao and Chen, Junchi and Jin, Zhenchao and Miao, Changtao and Yuan, Haojie and Chu, Qi and Gong, Tao and Yu, Nenghai}, title = {GuardTrace-VL: Detecting Unsafe Multimodel Reasoning via Iterative Safety Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11912-11922} }
SPAN: Spatial-Projection Alignment for Monocular 3D Object Detection: Yifan Wang,

Yian Zhao,

Fanqi Pu,

Xiaochen Yang,

Yang Tang,

Xi Chen,

Wenming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yifan and Zhao, Yian and Pu, Fanqi and Yang, Xiaochen and Tang, Yang and Chen, Xi and Yang, Wenming}, title = {SPAN: Spatial-Projection Alignment for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40762-40771} }
RAGTrack: Language-aware RGBT Tracking with Retrieval-Augmented Generation: Hao Li,

Yuhao Wang,

Wenning Hao,

Pingping Zhang,

Dong Wang,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hao and Wang, Yuhao and Hao, Wenning and Zhang, Pingping and Wang, Dong and Lu, Huchuan}, title = {RAGTrack: Language-aware RGBT Tracking with Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28179-28189} }
Is the Modality Gap a Bug or a Feature? A Robustness Perspective: Rhea Chowers,

Oshri Naparstek,

Udi Barzelay,

Yair Weiss; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowers_2026_CVPR, author = {Chowers, Rhea and Naparstek, Oshri and Barzelay, Udi and Weiss, Yair}, title = {Is the Modality Gap a Bug or a Feature? A Robustness Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30288-30298} }
Federated Active Learning Under Extreme Non-IID and Global Class Imbalance: Chen-Chen Zong,

Sheng-Jun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zong_2026_CVPR, author = {Zong, Chen-Chen and Huang, Sheng-Jun}, title = {Federated Active Learning Under Extreme Non-IID and Global Class Imbalance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24534-24544} }
Tackling Alignment Ambiguity in Person Retrieval through Conversational Attribute Mining: Hao Zou,

Runqing Zhang,

Jin Ding,

Xue Zhou,

Jianxiao Zou,

Mingzhu Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Hao and Zhang, Runqing and Ding, Jin and Zhou, Xue and Zou, Jianxiao and Cai, Mingzhu}, title = {Tackling Alignment Ambiguity in Person Retrieval through Conversational Attribute Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9689-9698} }
Transform to Transfer: Boosting Adversarial Attack Transferability on Vision-Language Pre-training Models: Yang Li,

Jia-Li Yin,

Luojun Lin,

Wei Lin; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yang and Yin, Jia-Li and Lin, Luojun and Lin, Wei}, title = {Transform to Transfer: Boosting Adversarial Attack Transferability on Vision-Language Pre-training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30141-30150} }
Streaming Diffusion Model for Fast Infrared and Visible Video Fusion: Jinyuan Liu,

Ludan Sun,

Tengyu Ma,

Chunyan Yang,

Zhiying Jiang,

Long Ma,

Risheng Liu,

Xin Fan; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinyuan and Sun, Ludan and Ma, Tengyu and Yang, Chunyan and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {Streaming Diffusion Model for Fast Infrared and Visible Video Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14305-14314} }
Explore with Long-term Memory: A Benchmark and Multimodal LLM-based Reinforcement Learning Framework for Embodied Exploration: Sen Wang,

Bangwei Liu,

Zhenkun Gao,

Lizhuang Ma,

Xuhong Wang,

Yuan Xie,

Xin Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Sen and Liu, Bangwei and Gao, Zhenkun and Ma, Lizhuang and Wang, Xuhong and Xie, Yuan and Tan, Xin}, title = {Explore with Long-term Memory: A Benchmark and Multimodal LLM-based Reinforcement Learning Framework for Embodied Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37098-37108} }
Multi-Modal Image Fusion via Intervention-Stable Feature Learning: Xue Wang,

Zheng Guan,

Wenhua Qian,

Chengchao Wang,

Runzhuo Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xue and Guan, Zheng and Qian, Wenhua and Wang, Chengchao and Ma, Runzhuo}, title = {Multi-Modal Image Fusion via Intervention-Stable Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33827-33837} }
Dark3R: Learning Structure from Motion in the Dark: Andrew Y. Guo,

Anagh Malik,

SaiKiran Tedla,

Yutong Dai,

Yiqian Qin,

Zach Salehe,

Benjamin Attal,

Sotiris Nousias,

Kiriakos N. Kutulakos,

David B. Lindell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Andrew Y. and Malik, Anagh and Tedla, SaiKiran and Dai, Yutong and Qin, Yiqian and Salehe, Zach and Attal, Benjamin and Nousias, Sotiris and Kutulakos, Kiriakos N. and Lindell, David B.}, title = {Dark3R: Learning Structure from Motion in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34160-34170} }
Gastric-X: A Multimodal Multi-Phase Benchmark Dataset for Advancing Vision-Language Models in Gastric Cancer Analysis: Yuanzhe Li,

Hao Chen,

Rui Yin,

Juyan Ba,

Yu Zhang,

Sheng Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuanzhe and Chen, Hao and Yin, Rui and Ba, Juyan and Zhang, Yu and Lu, Sheng}, title = {Gastric-X: A Multimodal Multi-Phase Benchmark Dataset for Advancing Vision-Language Models in Gastric Cancer Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2490-2501} }
AVION: Aerial Vision-Language Instruction from Offline Teacher to Prompt-Tuned Network: Yu Hu,

Jianyang Gu,

Hao Liu,

Yue Cao,

Jozsef Hamari,

Zheng Liu,

Mohsen Zardadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Gu, Jianyang and Liu, Hao and Cao, Yue and Hamari, Jozsef and Liu, Zheng and Zardadi, Mohsen}, title = {AVION: Aerial Vision-Language Instruction from Offline Teacher to Prompt-Tuned Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10106-10115} }
Efficient and Training-Free Single-Image Diffusion Models: Haojun Qiu,

Kiriakos N. Kutulakos,

David B. Lindell; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Haojun and Kutulakos, Kiriakos N. and Lindell, David B.}, title = {Efficient and Training-Free Single-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36157-36167} }
OSA: Echocardiography Video Segmentation via Orthogonalized State Update and Anatomical Prior-aware Feature Enhancement: Rui Wang,

Huisi Wu,

Jing Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Rui and Wu, Huisi and Qin, Jing}, title = {OSA: Echocardiography Video Segmentation via Orthogonalized State Update and Anatomical Prior-aware Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1428-1438} }
Detecting Compressed AI-Generated Images via Phase Spectrum Robustness: Kai Li,

Wenqi Ren,

Wei Wang,

Xiaochun Cao; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kai and Ren, Wenqi and Wang, Wei and Cao, Xiaochun}, title = {Detecting Compressed AI-Generated Images via Phase Spectrum Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35427-35436} }
VLM-Guided Group Preference Alignment for Diffusion-based Human Mesh Recovery: Wenhao Shen,

Hao Wang,

Wanqi Yin,

Fayao Liu,

Xulei Yang,

Chao Liang,

Zhongang Cai,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Wenhao and Wang, Hao and Yin, Wanqi and Liu, Fayao and Yang, Xulei and Liang, Chao and Cai, Zhongang and Lin, Guosheng}, title = {VLM-Guided Group Preference Alignment for Diffusion-based Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13918-13929} }
Tunable Soft Equivariance with Guarantees: Md Ashiqur Rahman,

Lim Jun Hao,

Jeremiah Jiang,

Teck-Yian Lim,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahman_2026_CVPR, author = {Rahman, Md Ashiqur and Hao, Lim Jun and Jiang, Jeremiah and Lim, Teck-Yian and Yeh, Raymond A.}, title = {Tunable Soft Equivariance with Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17693-17703} }
PhysIR-Splat: Physically Consistent Thermal Infrared Radiative Transfer in 3D Gaussian Splatting: Jingyuan Gao,

Yumeng Hu,

Fei Gao,

Mingjin Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jingyuan and Hu, Yumeng and Gao, Fei and Zhang, Mingjin}, title = {PhysIR-Splat: Physically Consistent Thermal Infrared Radiative Transfer in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11818-11828} }
Generalized-CVO: Fast and Correspondence-Free Local Point Cloud Registration with Second Order Riemannian Optimization: Ray Zhang,

Marcus Greiff,

Thomas Lew,

John Subosits; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ray and Greiff, Marcus and Lew, Thomas and Subosits, John}, title = {Generalized-CVO: Fast and Correspondence-Free Local Point Cloud Registration with Second Order Riemannian Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2948-2958} }
MultiAnimate: Pose-Guided Image Animation Made Extensible: Yingcheng Hu,

Haowen Gong,

Chuanguang Yang,

Zhulin An,

Yongjun Xu,

Songhua Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yingcheng and Gong, Haowen and Yang, Chuanguang and An, Zhulin and Xu, Yongjun and Liu, Songhua}, title = {MultiAnimate: Pose-Guided Image Animation Made Extensible}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9306-9316} }
Exposing and Evaluating Hallucinations for GUI Grounding: Zicheng Zhang,

Hongyi Jing,

Rui Lv,

Shuo Fang,

Shiai Zhu,

Junying Wang,

Chunyi Li,

Xiaohong Liu,

Chenguang Ma,

Guangtao Zhai; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zicheng and Jing, Hongyi and Lv, Rui and Fang, Shuo and Zhu, Shiai and Wang, Junying and Li, Chunyi and Liu, Xiaohong and Ma, Chenguang and Zhai, Guangtao}, title = {Exposing and Evaluating Hallucinations for GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40208-40223} }
Beyond Sequential Tools: A Unified VLM Agent System for Photographic Post-Processing via Dynamic Multi-Expert Fusion: Honglin Xiong,

Chenjie Zhu,

Jianbiao Ding,

Zixuan Ni,

Wei Li,

Zhenpeng Mi,

Qian Wang; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Honglin and Zhu, Chenjie and Ding, Jianbiao and Ni, Zixuan and Li, Wei and Mi, Zhenpeng and Wang, Qian}, title = {Beyond Sequential Tools: A Unified VLM Agent System for Photographic Post-Processing via Dynamic Multi-Expert Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41521-41530} }
ReAttnCLIP: Training-Free Open-Vocabulary Remote Sensing Image Segmentation via Re-defined Attention in CLIP: Xin Niu,

Manqi Zhao,

Dongsheng Jiang,

Yingying Wu,

Bing Su; [pdf] [supp]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Xin and Zhao, Manqi and Jiang, Dongsheng and Wu, Yingying and Su, Bing}, title = {ReAttnCLIP: Training-Free Open-Vocabulary Remote Sensing Image Segmentation via Re-defined Attention in CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24980-24989} }
LIBERO-Plus: A Progressive Robustness Benchmark for Visual-Language-Action Models: Senyu Fei,

Siyin Wang,

Junhao Shi,

Zihao Dai,

Jikun Cai,

Pengfang Qian,

Li Ji,

Xinzhe He,

Shiduo Zhang,

Zhaoye Fei,

Jinlan Fu,

Jingjing Gong,

Xipeng Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Fei_2026_CVPR, author = {Fei, Senyu and Wang, Siyin and Shi, Junhao and Dai, Zihao and Cai, Jikun and Qian, Pengfang and Ji, Li and He, Xinzhe and Zhang, Shiduo and Fei, Zhaoye and Fu, Jinlan and Gong, Jingjing and Qiu, Xipeng}, title = {LIBERO-Plus: A Progressive Robustness Benchmark for Visual-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38574-38583} }
MajutsuCity: Language-driven Aesthetic-adaptive City Generation with Controllable 3D Assets and Layouts: Zilong Huang,

Jun He,

Xiaobin Huang,

Ziyi Xiong,

Yang Luo,

Junyan Ye,

Weijia Li,

Yiping Chen,

Ting Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zilong and He, Jun and Huang, Xiaobin and Xiong, Ziyi and Luo, Yang and Ye, Junyan and Li, Weijia and Chen, Yiping and Han, Ting}, title = {MajutsuCity: Language-driven Aesthetic-adaptive City Generation with Controllable 3D Assets and Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31608-31618} }
AnyLift: Scaling Motion Reconstruction from Internet Videos via 2D Diffusion: Hongjie Li,

Heng Yu,

Jiaman Li,

Hong-Xing Yu,

Ehsan Adeli,

C. Karen Liu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hongjie and Yu, Heng and Li, Jiaman and Yu, Hong-Xing and Adeli, Ehsan and Liu, C. Karen and Wu, Jiajun}, title = {AnyLift: Scaling Motion Reconstruction from Internet Videos via 2D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13876-13886} }
Accelerating Diffusion via Hybrid Data-Pipeline Parallelism Based on Conditional Guidance Scheduling: Euisoo Jung,

Byunghyun Kim,

Hyunjin Kim,

Seonghye Cho,

Jae-Gil Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Euisoo and Kim, Byunghyun and Kim, Hyunjin and Cho, Seonghye and Lee, Jae-Gil}, title = {Accelerating Diffusion via Hybrid Data-Pipeline Parallelism Based on Conditional Guidance Scheduling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9374-9383} }
OpenDance: Multimodal Controllable 3D Dance Generation with Large-scale Internet Data: Jinlu Zhang,

Zixi Kang,

Libin Liu,

Jianlong Chang,

Qi Tian,

Feng Gao,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinlu and Kang, Zixi and Liu, Libin and Chang, Jianlong and Tian, Qi and Gao, Feng and Wang, Yizhou}, title = {OpenDance: Multimodal Controllable 3D Dance Generation with Large-scale Internet Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28860-28870} }
UniVerse: Empower Unified Generation with Reasoning and Knowledge: Kaiyue Sun,

Weiyang Jin,

Chengqi Duan,

Rongyao Fang,

Xian Liu,

Yuwei Niu,

Chunwei Wang,

Aoxue Li,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Kaiyue and Jin, Weiyang and Duan, Chengqi and Fang, Rongyao and Liu, Xian and Niu, Yuwei and Wang, Chunwei and Li, Aoxue and Liu, Xihui}, title = {UniVerse: Empower Unified Generation with Reasoning and Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21997-22006} }
EcoAlign: An Economically Rational Framework for Efficient LVLM Alignment: Ruoxi Cheng,

Hao-Xuan Ma,

Teng Ma,

Hongyi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ruoxi and Ma, Hao-Xuan and Ma, Teng and Zhang, Hongyi}, title = {EcoAlign: An Economically Rational Framework for Efficient LVLM Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17451-17461} }
Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings: Yunxiang Peng,

Mengmeng Ma,

Ziyu Yao,

Xi Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Yunxiang and Ma, Mengmeng and Yao, Ziyu and Peng, Xi}, title = {Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38936-38946} }
AniMimic: Imitating 3D Animation from Video Priors: Tianyi Xie,

Yunuo Chen,

Yaowei Guo,

Yin Yang,

Bolei Zhou,

Demetri Terzopoulos,

Ying Jiang,

Chenfanfu Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tianyi and Chen, Yunuo and Guo, Yaowei and Yang, Yin and Zhou, Bolei and Terzopoulos, Demetri and Jiang, Ying and Jiang, Chenfanfu}, title = {AniMimic: Imitating 3D Animation from Video Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40266-40276} }
Building Robust Vision Encoders for Cross-Dataset Evaluation in Immunofluorescent Microscopy: Umar Marikkar,

Syed Sameed Husain,

Muhammad Awais,

Sara Atito; [pdf] [supp]
[bibtex]
@InProceedings{Marikkar_2026_CVPR, author = {Marikkar, Umar and Husain, Syed Sameed and Awais, Muhammad and Atito, Sara}, title = {Building Robust Vision Encoders for Cross-Dataset Evaluation in Immunofluorescent Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28308-28317} }
Gated KalmaNet: A Fading Memory Layer through Test-time Ridge Regression: Liangzu Peng,

Aditya Chattopadhyay,

Luca Zancato,

Elvis Nunez,

Wei Xia,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Liangzu and Chattopadhyay, Aditya and Zancato, Luca and Nunez, Elvis and Xia, Wei and Soatto, Stefano}, title = {Gated KalmaNet: A Fading Memory Layer through Test-time Ridge Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20212-20222} }
The Missing Point in Vision Transformers for Universal Image Segmentation: Sajjad Shahabodini,

Mobina Mansoori,

Farnoush Bayatmakou,

Jamshid Abouei,

Konstantinos Plataniotis,

Arash Mohammadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shahabodini_2026_CVPR, author = {Shahabodini, Sajjad and Mansoori, Mobina and Bayatmakou, Farnoush and Abouei, Jamshid and Plataniotis, Konstantinos and Mohammadi, Arash}, title = {The Missing Point in Vision Transformers for Universal Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6314-6324} }
AeroAgent: A Vision-Physics-Decision Framework for Aerodynamic Vehicle Design: Ye Liu,

Shouyi Liu,

Huiyu Yang,

Jianghang Gu,

Wenhao Fan,

Zhongxin Yang,

Ding Wang,

Simeng Chen,

Zirun Jiang,

Yuanwei Bin,

Shiyi Chen,

Yuntian Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ye and Liu, Shouyi and Yang, Huiyu and Gu, Jianghang and Fan, Wenhao and Yang, Zhongxin and Wang, Ding and Chen, Simeng and Jiang, Zirun and Bin, Yuanwei and Chen, Shiyi and Chen, Yuntian}, title = {AeroAgent: A Vision-Physics-Decision Framework for Aerodynamic Vehicle Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11694-11703} }
Reviving ConvNeXt for Efficient Convolutional Diffusion Models: Taesung Kwon,

Lorenzo Bianchi,

Lennart Wittke,

Felix Watine,

Fabio Carrara,

Jong Chul Ye,

Romann Weber,

Vinicius Azevedo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Taesung and Bianchi, Lorenzo and Wittke, Lennart and Watine, Felix and Carrara, Fabio and Ye, Jong Chul and Weber, Romann and Azevedo, Vinicius}, title = {Reviving ConvNeXt for Efficient Convolutional Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43675-43685} }
StreamAvatar: Streaming Diffusion Models for Real-Time Interactive Human Avatars: Zhiyao Sun,

Ziqiao Peng,

Yifeng Ma,

Yi Chen,

Zhengguang Zhou,

Zixiang Zhou,

Guozhen Zhang,

Youliang Zhang,

Yuan Zhou,

Qinglin Lu,

Yong-Jin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhiyao and Peng, Ziqiao and Ma, Yifeng and Chen, Yi and Zhou, Zhengguang and Zhou, Zixiang and Zhang, Guozhen and Zhang, Youliang and Zhou, Yuan and Lu, Qinglin and Liu, Yong-Jin}, title = {StreamAvatar: Streaming Diffusion Models for Real-Time Interactive Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10887-10897} }
DetAny4D: Detect Anything 4D Temporally in a Streaming RGB Video: Jiawei Hou,

Shenghao Zhang,

Can Wang,

Zheng Gu,

Yonggen Ling,

Taiping Zeng,

Xiangyang Xue,

Jingbo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Jiawei and Zhang, Shenghao and Wang, Can and Gu, Zheng and Ling, Yonggen and Zeng, Taiping and Xue, Xiangyang and Zhang, Jingbo}, title = {DetAny4D: Detect Anything 4D Temporally in a Streaming RGB Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32798-32807} }
Rethinking Token Reduction for Large Vision-Language Models: Yi Wang,

Haofei Zhang,

Qihan Huang,

Anda Cao,

Gongfan Fang,

Wei Wang,

Xuan Jin,

Jie Song,

Mingli Song,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yi and Zhang, Haofei and Huang, Qihan and Cao, Anda and Fang, Gongfan and Wang, Wei and Jin, Xuan and Song, Jie and Song, Mingli and Wang, Xinchao}, title = {Rethinking Token Reduction for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24727-24737} }
CaptionQA: Is Your Caption as Useful as the Image Itself?: Shijia Yang,

Yunong Liu,

Bohan Zhai,

Ximeng Sun,

Zicheng Liu,

Emad Barsoum,

Manling Li,

Chenfeng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Shijia and Liu, Yunong and Zhai, Bohan and Sun, Ximeng and Liu, Zicheng and Barsoum, Emad and Li, Manling and Xu, Chenfeng}, title = {CaptionQA: Is Your Caption as Useful as the Image Itself?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23741-23750} }
Dynamics-Aware Preference Optimization for Vision-Language Models: Jusheng Zhang,

Kaitong Cai,

Jing Yang,

Jian Wang,

Keze Wang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Cai, Kaitong and Yang, Jing and Wang, Jian and Wang, Keze}, title = {Dynamics-Aware Preference Optimization for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11610-11620} }
Hierarchical Long Video Understanding with Audiovisual Entity Cohesion and Agentic Search: Xinlei Yin,

Xiulian Peng,

Xiao Li,

Zhiwei Xiong,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Xinlei and Peng, Xiulian and Li, Xiao and Xiong, Zhiwei and Lu, Yan}, title = {Hierarchical Long Video Understanding with Audiovisual Entity Cohesion and Agentic Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32882-32891} }
FlowDC: Flow-Based Decoupling-Decay for Complex Image Editing: Yilei Jiang,

Zhen Wang,

Yanghao Wang,

Jun Yu,

Yueting Zhuang,

Jun Xiao,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yilei and Wang, Zhen and Wang, Yanghao and Yu, Jun and Zhuang, Yueting and Xiao, Jun and Chen, Long}, title = {FlowDC: Flow-Based Decoupling-Decay for Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25757-25766} }
PDCR: Perception-Decomposed Confidence Reward for Vision-Language Reasoning: Hee Suk Yoon,

Eunseop Yoon,

Ji Woo Hong,

SooHwan Eom,

Gwanhyeong Koo,

Mark Hasegawa-Johnson,

Qi Dai,

Chong Luo,

Chang D. Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Hee Suk and Yoon, Eunseop and Hong, Ji Woo and Eom, SooHwan and Koo, Gwanhyeong and Hasegawa-Johnson, Mark and Dai, Qi and Luo, Chong and Yoo, Chang D.}, title = {PDCR: Perception-Decomposed Confidence Reward for Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18881-18891} }
CaptionFormer: Unified Segmentation, Tracking, and Captioning for Spatio-Temporal Objects: Gabriel Fiastre,

Antoine Yang,

Cordelia Schmid; [pdf] [supp]
[bibtex]
@InProceedings{Fiastre_2026_CVPR, author = {Fiastre, Gabriel and Yang, Antoine and Schmid, Cordelia}, title = {CaptionFormer: Unified Segmentation, Tracking, and Captioning for Spatio-Temporal Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39518-39528} }
MeanFlow Transformers with Representation Autoencoders: Zheyuan Hu,

Chieh-Hsin Lai,

Ge Wu,

Yuki Mitsufuji,

Stefano Ermon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zheyuan and Lai, Chieh-Hsin and Wu, Ge and Mitsufuji, Yuki and Ermon, Stefano}, title = {MeanFlow Transformers with Representation Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25709-25718} }
WeMMU: Enhanced Bridging of Vision-Language Models and Diffusion Models via Noisy Query Tokens: Jian Yang,

Dacheng Yin,

Xiaoxuan He,

Yong Li,

Fengyun Rao,

Jing Lyu,

Wei Zhai,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jian and Yin, Dacheng and He, Xiaoxuan and Li, Yong and Rao, Fengyun and Lyu, Jing and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {WeMMU: Enhanced Bridging of Vision-Language Models and Diffusion Models via Noisy Query Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17609-17618} }
PAM: A Pose-Appearance-Motion Engine for Sim-to-Real HOI Video Generation: Mingju Gao,

Kaisen Yang,

Huan-ang Gao,

Bohan Li,

Ao Ding,

Wenyi Li,

Yangcheng Yu,

Jinkun Liu,

Shaocong Xu,

Yike Niu,

Haohan Chi,

Hao Chen,

Hao Tang,

Yu Zhang,

Li Yi,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Mingju and Yang, Kaisen and Gao, Huan-ang and Li, Bohan and Ding, Ao and Li, Wenyi and Yu, Yangcheng and Liu, Jinkun and Xu, Shaocong and Niu, Yike and Chi, Haohan and Chen, Hao and Tang, Hao and Zhang, Yu and Yi, Li and Zhao, Hao}, title = {PAM: A Pose-Appearance-Motion Engine for Sim-to-Real HOI Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15954-15965} }
CLAY: Conditional Visual Similarity Modulation in Vision-Language Embedding Space: Sohwi Lim,

Lee Hyoseok,

Jungjoon Park,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Sohwi and Hyoseok, Lee and Park, Jungjoon and Oh, Tae-Hyun}, title = {CLAY: Conditional Visual Similarity Modulation in Vision-Language Embedding Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9679-9688} }
The Surprising Effectiveness of Noise Pretraining for Implicit Neural Representations: Kushal Vyas,

Alper Kayabasi,

Daniel Kim,

Vishwanath Saragadam,

Ashok Veeraraghavan,

Guha Balakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vyas_2026_CVPR, author = {Vyas, Kushal and Kayabasi, Alper and Kim, Daniel and Saragadam, Vishwanath and Veeraraghavan, Ashok and Balakrishnan, Guha}, title = {The Surprising Effectiveness of Noise Pretraining for Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6116-6125} }
Learning from Semantic Dictionaries: Discriminative Codebook Contrastive Learning for Unified Visual Representation and Generation: Imanol G. Estepa,

Jesús M. Rodríguez-de-Vera,

Bhalaji Nagarajan,

Petia Radeva; [pdf] [supp]
[bibtex]
@InProceedings{Estepa_2026_CVPR, author = {Estepa, Imanol G. and Rodr{\'\i}guez-de-Vera, Jes\'us M. and Nagarajan, Bhalaji and Radeva, Petia}, title = {Learning from Semantic Dictionaries: Discriminative Codebook Contrastive Learning for Unified Visual Representation and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22456-22466} }
ZipMap: Linear-Time Stateful 3D Reconstruction via Test-Time Training: Haian Jin,

Rundi Wu,

Tianyuan Zhang,

Ruiqi Gao,

Jonathan T. Barron,

Noah Snavely,

Aleksander Hołyński; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Haian and Wu, Rundi and Zhang, Tianyuan and Gao, Ruiqi and Barron, Jonathan T. and Snavely, Noah and Ho{\l}y\'nski, Aleksander}, title = {ZipMap: Linear-Time Stateful 3D Reconstruction via Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21748-21759} }
SGDrive: Scene-to-Goal Hierarchical World Cognition for Autonomous Driving: Jingyu Li,

Junjie Wu,

Dongnan Hu,

Xiangkai Huang,

Bin Sun,

Zhihui Hao,

Xianpeng Lang,

Xiatian Zhu,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jingyu and Wu, Junjie and Hu, Dongnan and Huang, Xiangkai and Sun, Bin and Hao, Zhihui and Lang, Xianpeng and Zhu, Xiatian and Zhang, Li}, title = {SGDrive: Scene-to-Goal Hierarchical World Cognition for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4032-4042} }
OnlinePG: Online Open-Vocabulary Panoptic Mapping with 3D Gaussian Splatting: Hongjia Zhai,

Qi Zhang,

Xiaokun Pan,

Xiyu Zhang,

Yitong Dong,

Huaqi Zhang,

Dan Xu,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhai_2026_CVPR, author = {Zhai, Hongjia and Zhang, Qi and Pan, Xiaokun and Zhang, Xiyu and Dong, Yitong and Zhang, Huaqi and Xu, Dan and Zhang, Guofeng}, title = {OnlinePG: Online Open-Vocabulary Panoptic Mapping with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33269-33279} }
Are Image-to-Video Models Good Zero-Shot Image Editors?: Zechuan Zhang,

Zhenyuan Chen,

Zongxin Yang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zechuan and Chen, Zhenyuan and Yang, Zongxin and Yang, Yi}, title = {Are Image-to-Video Models Good Zero-Shot Image Editors?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2090-2103} }
PyraTok: Language-Aligned Pyramidal Tokenizer for Video Understanding and Generation: Onkar Susladkar,

Tushar Prakash,

Adheesh Juvekar,

Kiet A. Nguyen,

Dong-Hwan Jang,

Inderjit S Dhillon,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Susladkar_2026_CVPR, author = {Susladkar, Onkar and Prakash, Tushar and Juvekar, Adheesh and Nguyen, Kiet A. and Jang, Dong-Hwan and Dhillon, Inderjit S and Lourentzou, Ismini}, title = {PyraTok: Language-Aligned Pyramidal Tokenizer for Video Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37906-37917} }
RT-Splatting: Joint Reflection-Transmission Modeling with Gaussian Splatting: Ji Shi,

Xianghua Ying,

Bowei Xing,

Ruohao Guo,

Wenzhen Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Ji and Ying, Xianghua and Xing, Bowei and Guo, Ruohao and Yue, Wenzhen}, title = {RT-Splatting: Joint Reflection-Transmission Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4870-4880} }
From Observation to Action: Latent Action-based Primitive Segmentation for VLA Pre-training in Industrial Settings: Jiajie Zhang,

Sören Schwertfeger,

Alexander Kleiner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiajie and Schwertfeger, S\"oren and Kleiner, Alexander}, title = {From Observation to Action: Latent Action-based Primitive Segmentation for VLA Pre-training in Industrial Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6750-6759} }
Retrieve and Segment: Are a Few Examples Enough to Bridge the Supervision Gap in Open-Vocabulary Segmentation?: Tilemachos Aravanis,

Vladan Stojnić,

Bill Psomas,

Nikos Komodakis,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Aravanis_2026_CVPR, author = {Aravanis, Tilemachos and Stojni\'c, Vladan and Psomas, Bill and Komodakis, Nikos and Tolias, Giorgos}, title = {Retrieve and Segment: Are a Few Examples Enough to Bridge the Supervision Gap in Open-Vocabulary Segmentation?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27621-27632} }
Shoe Style-Invariant and Ground-Aware Learning for Dense Foot Contact Estimation: Daniel Sungho Jung,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Daniel Sungho and Lee, Kyoung Mu}, title = {Shoe Style-Invariant and Ground-Aware Learning for Dense Foot Contact Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7058-7067} }
UniGeoSeg: Towards Unified Open-World Segmentation for Geospatial Scenes: Shuo Ni,

Di Wang,

He Chen,

Haonan Guo,

Ning Zhang,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Shuo and Wang, Di and Chen, He and Guo, Haonan and Zhang, Ning and Zhang, Jing}, title = {UniGeoSeg: Towards Unified Open-World Segmentation for Geospatial Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34865-34876} }
ShiftLUT: Spatial Shift Enhanced Look-Up Tables for Efficient Image Restoration: Xiaolong Zeng,

Yitong Yu,

Shiyao Xiong,

Jinhua Hao,

Ming Sun,

Chao Zhou,

Bin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xiaolong and Yu, Yitong and Xiong, Shiyao and Hao, Jinhua and Sun, Ming and Zhou, Chao and Wang, Bin}, title = {ShiftLUT: Spatial Shift Enhanced Look-Up Tables for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29959-29968} }
NaTex: Seamless Texture Generation as Latent Color Diffusion: Zeqiang Lai,

Yunfei Zhao,

Zibo Zhao,

Xin Yang,

Xin Huang,

Jingwei Huang,

Xiangyu Yue,

Chunchao Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Zeqiang and Zhao, Yunfei and Zhao, Zibo and Yang, Xin and Huang, Xin and Huang, Jingwei and Yue, Xiangyu and Guo, Chunchao}, title = {NaTex: Seamless Texture Generation as Latent Color Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18619-18629} }
HoneyBee: Data Recipes for Vision-Language Reasoners: Hritik Bansal,

Devendra Singh Sachan,

Kai-Wei Chang,

Aditya Grover,

Gargi Ghosh,

Wen-tau Yih,

Ramakanth Pasunuru; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bansal_2026_CVPR, author = {Bansal, Hritik and Sachan, Devendra Singh and Chang, Kai-Wei and Grover, Aditya and Ghosh, Gargi and Yih, Wen-tau and Pasunuru, Ramakanth}, title = {HoneyBee: Data Recipes for Vision-Language Reasoners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26262-26273} }
One-Shot Flow, Any-Time Frame: A Bidirectional Warping Framework for Event-Based Video Frame Interpolation: Linghui Fu,

Yuhan Liu,

Hao Chen,

Zhen Yang,

Yongjian Deng; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Linghui and Liu, Yuhan and Chen, Hao and Yang, Zhen and Deng, Yongjian}, title = {One-Shot Flow, Any-Time Frame: A Bidirectional Warping Framework for Event-Based Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2832-2842} }
Condensed Test-Time Adaptation of VLMs for Action Recognition: Wenxuan Ge,

Hongyu Qu,

Rui Yan,

Guo-Sen Xie,

Yazhou Yao,

Xiangbo Shu,

Jinhui Tang; [pdf] [supp]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Wenxuan and Qu, Hongyu and Yan, Rui and Xie, Guo-Sen and Yao, Yazhou and Shu, Xiangbo and Tang, Jinhui}, title = {Condensed Test-Time Adaptation of VLMs for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16977-16987} }
RI-Mamba: Rotation-Invariant Mamba for Robust Text-to-Shape Retrieval: Khanh Nguyen,

Dasith de Silva Edirimuni,

Ghulam Mubashar Hassan,

Ajmal Mian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Khanh and de Silva Edirimuni, Dasith and Hassan, Ghulam Mubashar and Mian, Ajmal}, title = {RI-Mamba: Rotation-Invariant Mamba for Robust Text-to-Shape Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16834-16844} }
PointThinker: Point-Incentivized Parallel Thinking for Multimodal Large Language Model: Zhengdong Hu,

Chao Wang,

Fengyun Rao,

Jing LYU,

Hehe Fan,

Yi Yang; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zhengdong and Wang, Chao and Rao, Fengyun and LYU, Jing and Fan, Hehe and Yang, Yi}, title = {PointThinker: Point-Incentivized Parallel Thinking for Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26240-26250} }
Balanced Hierarchical Contrastive Learning with Decoupled Queries for Fine-grained Object Detection in Remote Sensing Images: Jingzhou Chen,

Dexin Chen,

Fengchao Xiong,

Yuntao Qian,

Liang Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jingzhou and Chen, Dexin and Xiong, Fengchao and Qian, Yuntao and Xiao, Liang}, title = {Balanced Hierarchical Contrastive Learning with Decoupled Queries for Fine-grained Object Detection in Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20619-20628} }
Stand-In: A Lightweight and Plug-and-Play Identity Control for Video Generation: Bowen Xue,

Zheng-Peng Duan,

Qixin Yan,

Wenjing Wang,

Hao Liu,

Chun-Le Guo,

Chongyi Li,

Chen Li,

Jing Lyu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Bowen and Duan, Zheng-Peng and Yan, Qixin and Wang, Wenjing and Liu, Hao and Guo, Chun-Le and Li, Chongyi and Li, Chen and Lyu, Jing}, title = {Stand-In: A Lightweight and Plug-and-Play Identity Control for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23314-23324} }
SEASON: Mitigating Temporal Hallucination in Video Large Language Models via Self-Diagnostic Contrastive Decoding: Chang-Hsun Wu,

Kai-Po Chang,

Yu-Yang Sheng,

Hung-Kai Chung,

Kuei-Chun Wang,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chang-Hsun and Chang, Kai-Po and Sheng, Yu-Yang and Chung, Hung-Kai and Wang, Kuei-Chun and Wang, Yu-Chiang Frank}, title = {SEASON: Mitigating Temporal Hallucination in Video Large Language Models via Self-Diagnostic Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11096-11105} }
VecGlypher: Unified Vector Glyph Generation with Language Models: Xiaoke Huang,

Bhavul Gauri,

Kam Woh Ng,

Tony Ng,

Mengmeng Xu,

Zhiheng Liu,

Weiming Ren,

Zhaochong An,

Zijian Zhou,

Haonan Qiu,

Yuyin Zhou,

Sen He,

Ziheng Wang,

Tao Xiang,

Xiao Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xiaoke and Gauri, Bhavul and Ng, Kam Woh and Ng, Tony and Xu, Mengmeng and Liu, Zhiheng and Ren, Weiming and An, Zhaochong and Zhou, Zijian and Qiu, Haonan and Zhou, Yuyin and He, Sen and Wang, Ziheng and Xiang, Tao and Han, Xiao}, title = {VecGlypher: Unified Vector Glyph Generation with Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24373-24383} }
MMSD3.0: A Multi-Image Benchmark for Real-World Multimodal Sarcasm Detection: Haochen Zhao,

Yuyao Kong,

Yongxiu Xu,

Gaopeng Gou,

Hongbo Xu,

Yubin Wang,

Haoliang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haochen and Kong, Yuyao and Xu, Yongxiu and Gou, Gaopeng and Xu, Hongbo and Wang, Yubin and Zhang, Haoliang}, title = {MMSD3.0: A Multi-Image Benchmark for Real-World Multimodal Sarcasm Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37885-37895} }
Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization: Weijian Su,

Songqian Zhang,

Yuqi Han,

Jian Zhuang,

Yongdong Huang,

Qiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Weijian and Zhang, Songqian and Han, Yuqi and Zhuang, Jian and Huang, Yongdong and Zhang, Qiang}, title = {Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41499-41509} }
A Training-Free Style-Personalization via SVD-Based Feature Decomposition: Kyoungmin Lee,

Jihun Park,

Jongmin Gim,

Wonhyeok Choi,

Kyumin Hwang,

Jaeyeul Kim,

Sunghoon Im; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Kyoungmin and Park, Jihun and Gim, Jongmin and Choi, Wonhyeok and Hwang, Kyumin and Kim, Jaeyeul and Im, Sunghoon}, title = {A Training-Free Style-Personalization via SVD-Based Feature Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {506-516} }
Beyond the Ground Truth: Enhanced Supervision for Image Restoration: Donghun Ryou,

Inju Ha,

Sanghyeok Chu,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ryou_2026_CVPR, author = {Ryou, Donghun and Ha, Inju and Chu, Sanghyeok and Han, Bohyung}, title = {Beyond the Ground Truth: Enhanced Supervision for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29949-29958} }
SineProject: Machine Unlearning for Stable Vision-Language Alignment: Arpit Garg,

Hemanth Saratchandran,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garg_2026_CVPR, author = {Garg, Arpit and Saratchandran, Hemanth and Lucey, Simon}, title = {SineProject: Machine Unlearning for Stable Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31735-31745} }
Unlocking Token Rewards via Training-Free Reward Attribution: Sitong Wu,

Haoru Tan,

Bin Xia,

Xichen Zhang,

Jingyao Li,

Shaofeng Zhang,

Xiaojuan Qi,

Bei Yu,

Jiaya Jia; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Sitong and Tan, Haoru and Xia, Bin and Zhang, Xichen and Li, Jingyao and Zhang, Shaofeng and Qi, Xiaojuan and Yu, Bei and Jia, Jiaya}, title = {Unlocking Token Rewards via Training-Free Reward Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5082-5091} }
Delving Aleatoric Uncertainty in Medical Image Segmentation via Vision Foundation Models: Ruiyang Li,

Fang Liu,

Licheng Jiao,

Xinglin Xie,

Jiayao Hao,

Shuo Li,

Xu Liu,

Jingyi Yang,

Lingling Li,

Puhua Chen,

Wenping Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ruiyang and Liu, Fang and Jiao, Licheng and Xie, Xinglin and Hao, Jiayao and Li, Shuo and Liu, Xu and Yang, Jingyi and Li, Lingling and Chen, Puhua and Ma, Wenping}, title = {Delving Aleatoric Uncertainty in Medical Image Segmentation via Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30011-30020} }
Language Models Can Explain Visual Features via Steering: Javier Ferrando,

Enrique Lopez-Cuena,

Pablo Agustin Martin-Torres,

Daniel Hinjos,

Anna Arias-Duart,

Dario Garcia-Gasulla; [pdf] [supp]
[bibtex]
@InProceedings{Ferrando_2026_CVPR, author = {Ferrando, Javier and Lopez-Cuena, Enrique and Martin-Torres, Pablo Agustin and Hinjos, Daniel and Arias-Duart, Anna and Garcia-Gasulla, Dario}, title = {Language Models Can Explain Visual Features via Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38947-38958} }
PhyOceanCast: Global Ocean Forecasting with Physics-Informed Diffusion: Qixiu Li,

Xiang Zhu,

Xiaoyong Li,

Xiaolong Xu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qixiu and Zhu, Xiang and Li, Xiaoyong and Xu, Xiaolong}, title = {PhyOceanCast: Global Ocean Forecasting with Physics-Informed Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23652-23662} }
PanDA: Unsupervised Domain Adaptation for Multimodal 3D Panoptic Segmentation in Autonomous Driving: Yining Pan,

Shijie Li,

Yuchen Wu,

Xulei Yang,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Yining and Li, Shijie and Wu, Yuchen and Yang, Xulei and Zhao, Na}, title = {PanDA: Unsupervised Domain Adaptation for Multimodal 3D Panoptic Segmentation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33057-33067} }
FreeArtGS: Articulated Gaussian Splatting Under Free-moving Scenario: Hang Dai,

Hongwei Fan,

Han Zhang,

Duojin Wu,

Jiyao Zhang,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Hang and Fan, Hongwei and Zhang, Han and Wu, Duojin and Zhang, Jiyao and Dong, Hao}, title = {FreeArtGS: Articulated Gaussian Splatting Under Free-moving Scenario}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11777-11787} }
ReCoFuse: Ultra-Robust Image Fusion via Restorative Multi-Modal Diffusion Reciprocal Coupling: Hao Zhang,

Shuhan Yang,

Linfeng Tang,

Xunpeng Yi,

Jiayi Ma; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Yang, Shuhan and Tang, Linfeng and Yi, Xunpeng and Ma, Jiayi}, title = {ReCoFuse: Ultra-Robust Image Fusion via Restorative Multi-Modal Diffusion Reciprocal Coupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33838-33847} }
GraPHFormer: A Multimodal Graph Persistent Homology Transformer for the Analysis of Neuroscience Morphologies: Uzair Shah,

Marco Agus,

Mahmoud Gamal,

Mahmood Alzubaidi,

Corrado Cali,

Pierre J. Magistretti,

Abdesselam Bouzerdoum,

Mowafa Househ; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shah_2026_CVPR, author = {Shah, Uzair and Agus, Marco and Gamal, Mahmoud and Alzubaidi, Mahmood and Cali, Corrado and Magistretti, Pierre J. and Bouzerdoum, Abdesselam and Househ, Mowafa}, title = {GraPHFormer: A Multimodal Graph Persistent Homology Transformer for the Analysis of Neuroscience Morphologies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28224-28233} }
The Drift Kernel: Why Diffusion Models Change Even When Told Not To: Gokul Srinath Seetha Ram,

Rashmi Elavazhagan; [pdf] [supp]
[bibtex]
@InProceedings{Ram_2026_CVPR, author = {Ram, Gokul Srinath Seetha and Elavazhagan, Rashmi}, title = {The Drift Kernel: Why Diffusion Models Change Even When Told Not To}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43281-43289} }
Accelerating Autoregressive Video Diffusion via History-Guided Cache and Residual Correction: Kepan Nan,

Wangbo Zhao,

Penghao Zhou,

Jun Li,

Zhenheng Yang,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Nan_2026_CVPR, author = {Nan, Kepan and Zhao, Wangbo and Zhou, Penghao and Li, Jun and Yang, Zhenheng and Yang, Jian and Tai, Ying}, title = {Accelerating Autoregressive Video Diffusion via History-Guided Cache and Residual Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43740-43750} }
HyperNAS: Enhancing Architecture Representation for NAS Predictor via Hypernetwork: Jindi Lv,

Yuhao Zhou,

Yuxin Tian,

Qing Ye,

Wentao Feng,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Jindi and Zhou, Yuhao and Tian, Yuxin and Ye, Qing and Feng, Wentao and Lv, Jiancheng}, title = {HyperNAS: Enhancing Architecture Representation for NAS Predictor via Hypernetwork}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12955-12965} }
Locate-Then-Examine: Grounded Region Reasoning Improves Detection of AI-Generated Images: Yikun Ji,

Yan Hong,

Bowen Deng,

Jun Lan,

Huijia Zhu,

Weiqiang Wang,

Liqing Zhang,

Jianfu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Yikun and Hong, Yan and Deng, Bowen and Lan, Jun and Zhu, Huijia and Wang, Weiqiang and Zhang, Liqing and Zhang, Jianfu}, title = {Locate-Then-Examine: Grounded Region Reasoning Improves Detection of AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19165-19175} }
Act Like a Pathologist: Tissue-Aware Whole Slide Image Reasoning: Wentao Huang,

Weimin Lyu,

Peiliang Lou,

Qingqiao Hu,

Xiaoling Hu,

Shahira Abousamra,

Wenchao Han,

Ruifeng Guo,

Jiawei Zhou,

Chao Chen,

Chen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Wentao and Lyu, Weimin and Lou, Peiliang and Hu, Qingqiao and Hu, Xiaoling and Abousamra, Shahira and Han, Wenchao and Guo, Ruifeng and Zhou, Jiawei and Chen, Chao and Wang, Chen}, title = {Act Like a Pathologist: Tissue-Aware Whole Slide Image Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6972-6981} }
Dehallu3D: Hallucination-Mitigated 3D Generation from a Single Image via Cyclic View Consistency Refinement: Xiwen Wang,

Shichao Zhang,

Ruowei Wang,

Mao Li,

Chenyu Zhou,

Ji-Zhe Zhou,

Qijun Zhao,

Hailun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiwen and Zhang, Shichao and Wang, Ruowei and Li, Mao and Zhou, Chenyu and Zhou, Ji-Zhe and Zhao, Qijun and Zhang, Hailun}, title = {Dehallu3D: Hallucination-Mitigated 3D Generation from a Single Image via Cyclic View Consistency Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19993-20002} }
Does YOLO Really Need to See Every Training Image in Every Epoch?: Xingxing Xie,

Jiahua Dong,

Junwei Han,

Gong Cheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Xingxing and Dong, Jiahua and Han, Junwei and Cheng, Gong}, title = {Does YOLO Really Need to See Every Training Image in Every Epoch?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {135-144} }
Personalized Federated Training of Diffusion Models with Privacy Guarantees: Kumar Kshitij Patel,

Bingqing Jiang,

A F M Mahfuzul Kabir,

Weitong Zhang,

Difan Zou,

Lingxiao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Patel_2026_CVPR, author = {Patel, Kumar Kshitij and Jiang, Bingqing and Kabir, A F M Mahfuzul and Zhang, Weitong and Zou, Difan and Wang, Lingxiao}, title = {Personalized Federated Training of Diffusion Models with Privacy Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31790-31801} }
KaLOS finds Consensus: A Meta-Algorithm for Evaluating Inter-Annotator Agreement in Complex Vision Tasks: David Tschirschwitz,

Volker Rodehorst; [pdf] [supp]
[bibtex]
@InProceedings{Tschirschwitz_2026_CVPR, author = {Tschirschwitz, David and Rodehorst, Volker}, title = {KaLOS finds Consensus: A Meta-Algorithm for Evaluating Inter-Annotator Agreement in Complex Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38554-38563} }
Rank-Guided Pseudo-Bias Learning for Robust Black-Box Adaptation: Rajeev Ranjan Dwivedi,

Anshuman Dangwal,

Vinod K Kurmi; [pdf] [supp]
[bibtex]
@InProceedings{Dwivedi_2026_CVPR, author = {Dwivedi, Rajeev Ranjan and Dangwal, Anshuman and Kurmi, Vinod K}, title = {Rank-Guided Pseudo-Bias Learning for Robust Black-Box Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31683-31692} }
Learning What Helps: Task-Aligned Context Selection for Vision Tasks: Jingyu Guo,

Emir Konuk,

Fredrik Strand,

Christos Matsoukas,

Kevin Smith; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Jingyu and Konuk, Emir and Strand, Fredrik and Matsoukas, Christos and Smith, Kevin}, title = {Learning What Helps: Task-Aligned Context Selection for Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11632-11642} }
TextPecker: Rewarding Structural Anomaly Quantification for Enhancing Visual Text Rendering: Hanshen Zhu,

Yuliang Liu,

Xuecheng Wu,

An-Lan Wang,

Hao Feng,

Dingkang Yang,

Chao Feng,

Can Huang,

Jingqun Tang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hanshen and Liu, Yuliang and Wu, Xuecheng and Wang, An-Lan and Feng, Hao and Yang, Dingkang and Feng, Chao and Huang, Can and Tang, Jingqun and Bai, Xiang}, title = {TextPecker: Rewarding Structural Anomaly Quantification for Enhancing Visual Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22059-22069} }
Breaking the Regional Perception Bottleneck of Multimodal Large Language Models via External Reasoning Framework: Jinrong Zhang,

Zhaoyang Xu,

Xusheng He,

Xinrui Li,

Na Zheng,

Jianlong Wu; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinrong and Xu, Zhaoyang and He, Xusheng and Li, Xinrui and Zheng, Na and Wu, Jianlong}, title = {Breaking the Regional Perception Bottleneck of Multimodal Large Language Models via External Reasoning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33531-33541} }
Scaling Spatial Intelligence with Multimodal Foundation Models: Zhongang Cai,

Ruisi Wang,

Chenyang Gu,

Fanyi Pu,

Junxiang Xu,

Yubo Wang,

Wanqi Yin,

Zhitao Yang,

Chen Wei,

Tongxi Zhou,

Qingping Sun,

Hui En Pang,

Jiaqi Li,

Oscar Qian,

Zhiqian Lin,

Xuanke Shi,

Kewang Deng,

Xiaoyang Han,

Zukai Chen,

Xiangyu Fan,

Hanming Deng,

Lewei Lu,

Liang Pan,

Bo Li,

Ziwei Liu,

Quan Wang,

Dahua Lin,

Lei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Zhongang and Wang, Ruisi and Gu, Chenyang and Pu, Fanyi and Xu, Junxiang and Wang, Yubo and Yin, Wanqi and Yang, Zhitao and Wei, Chen and Zhou, Tongxi and Sun, Qingping and Pang, Hui En and Li, Jiaqi and Qian, Oscar and Lin, Zhiqian and Shi, Xuanke and Deng, Kewang and Han, Xiaoyang and Chen, Zukai and Fan, Xiangyu and Deng, Hanming and Lu, Lewei and Pan, Liang and Li, Bo and Liu, Ziwei and Wang, Quan and Lin, Dahua and Yang, Lei}, title = {Scaling Spatial Intelligence with Multimodal Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7879-7890} }
Semantic Audio-Visual Navigation in Continuous Environments: Yichen Zeng,

Hebaixu Wang,

Meng Liu,

Yu Zhou,

Chen Gao,

Kehan Chen,

Gongping Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yichen and Wang, Hebaixu and Liu, Meng and Zhou, Yu and Gao, Chen and Chen, Kehan and Huang, Gongping}, title = {Semantic Audio-Visual Navigation in Continuous Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22369-22379} }
Cross-Modal Attention Calibration for LVLM Hallucination Mitigation: Jiaming Li,

Jiacheng Zhang,

Zequn Jie,

Lin Ma,

Ming Li,

Xiaonan Luo,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiaming and Zhang, Jiacheng and Jie, Zequn and Ma, Lin and Li, Ming and Luo, Xiaonan and Li, Guanbin}, title = {Cross-Modal Attention Calibration for LVLM Hallucination Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40186-40196} }
MonoSAOD: Monocular 3D Object Detection with Sparsely Annotated Label: Junyoung Jung,

Seokwon Kim,

Jung Uk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Junyoung and Kim, Seokwon and Kim, Jung Uk}, title = {MonoSAOD: Monocular 3D Object Detection with Sparsely Annotated Label}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4718-4727} }
Human Geometry Distribution for 3D Animation Generation: Xiangjun Tang,

Biao Zhang,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Xiangjun and Zhang, Biao and Wonka, Peter}, title = {Human Geometry Distribution for 3D Animation Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38313-38323} }
Training High-Level Schedulers with Execution-Feedback Reinforcement Learning for Long-Horizon GUI Automation: Zehao Deng,

Tianjie Ju,

Zheng Wu,

Zhuosheng Zhang,

Gongshen Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Zehao and Ju, Tianjie and Wu, Zheng and Zhang, Zhuosheng and Liu, Gongshen}, title = {Training High-Level Schedulers with Execution-Feedback Reinforcement Learning for Long-Horizon GUI Automation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27525-27535} }
Direct Segmentation without Logits Optimization for Training-Free Open-Vocabulary Semantic Segmentation: Jiahao Li,

Yang Lu,

Yachao Zhang,

Fangyong Wang,

Yuan Xie,

Yanyun Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Lu, Yang and Zhang, Yachao and Wang, Fangyong and Xie, Yuan and Qu, Yanyun}, title = {Direct Segmentation without Logits Optimization for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13168-13178} }
Moving Border Ownership for Event-based Motion Segmentation: Zhiyuan Hua,

Cornelia Fermüller,

Yiannis Aloimonos; [pdf] [supp]
[bibtex]
@InProceedings{Hua_2026_CVPR, author = {Hua, Zhiyuan and Ferm\"uller, Cornelia and Aloimonos, Yiannis}, title = {Moving Border Ownership for Event-based Motion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37043-37052} }
TGTrack: Temporal Generative Learning for Unified Single Object Tracking: Wanting Geng,

Xin Chen,

Chuanyu Sun,

Jie Zhao,

Ben Kang,

Dong Wang,

Huchuan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Wanting and Chen, Xin and Sun, Chuanyu and Zhao, Jie and Kang, Ben and Wang, Dong and Lu, Huchuan}, title = {TGTrack: Temporal Generative Learning for Unified Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28134-28144} }
Towards Visual Query Localization in the 3D World: Liang Peng,

Bohan Tan,

Zhipeng Zhang,

Haobo Li,

Yifan Jiao,

Xingping Dong,

Libo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Liang and Tan, Bohan and Zhang, Zhipeng and Li, Haobo and Jiao, Yifan and Dong, Xingping and Zhang, Libo}, title = {Towards Visual Query Localization in the 3D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41406-41415} }
OrienPose: Orientation-Guided Novel View Synthesis for Single-Image Unseen Object Pose Estimation: Yating Liu,

Zhaoshuai Qi,

Yang Zou,

Yongnan Yang,

Shizhou Zhang,

Yanning Zhang; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yating and Qi, Zhaoshuai and Zou, Yang and Yang, Yongnan and Zhang, Shizhou and Zhang, Yanning}, title = {OrienPose: Orientation-Guided Novel View Synthesis for Single-Image Unseen Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26813-26823} }
What Is It Like to Be a Noise? An Entropy-based Gaussian Noise Regularization for Diffusion Models: Pascal Chang,

Studios___ Switzerland 0000-0002-8590-8039,

Kai Lascheit,

Studios___ Switzerland blank,

Jingwei Tang,

Studios___ Switzerland 0009-0000-6005-7808,

Markus Gross,

Studios___ Switzerland 0009-0003-9324-779X,

Vinicius C. Azevedo,

Studios___ Switzerland 0009-0002-4133-4309; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Pascal and 0000-0002-8590-8039, Studios\_\_\_ Switzerland and Lascheit, Kai and blank, Studios\_\_\_ Switzerland and Tang, Jingwei and 0009-0000-6005-7808, Studios\_\_\_ Switzerland and Gross, Markus and 0009-0003-9324-779X, Studios\_\_\_ Switzerland and Azevedo, Vinicius C. and 0009-0002-4133-4309, Studios\_\_\_ Switzerland}, title = {What Is It Like to Be a Noise? An Entropy-based Gaussian Noise Regularization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43471-43481} }
PhysVid: Physics Aware Local Conditioning for Generative Video Models: Saurabh Pathak,

Elahe Arani,

Mykola Pechenizkiy,

Bahram Zonooz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pathak_2026_CVPR, author = {Pathak, Saurabh and Arani, Elahe and Pechenizkiy, Mykola and Zonooz, Bahram}, title = {PhysVid: Physics Aware Local Conditioning for Generative Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41847-41858} }
Towards Motion Turing Test: Evaluating Human-Likeness in Humanoid Robots: Mingzhe Li,

Mengyin Liu,

Zekai Wu,

Xincheng Lin,

Junsheng Zhang,

Ming Yan,

Zengye Xie,

Changwang Zhang,

Chenglu Wen,

Lan Xu,

Siqi Shen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingzhe and Liu, Mengyin and Wu, Zekai and Lin, Xincheng and Zhang, Junsheng and Yan, Ming and Xie, Zengye and Zhang, Changwang and Wen, Chenglu and Xu, Lan and Shen, Siqi and Wang, Cheng}, title = {Towards Motion Turing Test: Evaluating Human-Likeness in Humanoid Robots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16486-16498} }
Harnessing Chain-of-Thought Reasoning in Multimodal Large Language Models for Face Anti-Spoofing: Honglu Zhang,

Zhiqin Fang,

Ningning Zhao,

Saihui Hou,

Long Ma,

Renwang Pei,

Zhaofeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Honglu and Fang, Zhiqin and Zhao, Ningning and Hou, Saihui and Ma, Long and Pei, Renwang and He, Zhaofeng}, title = {Harnessing Chain-of-Thought Reasoning in Multimodal Large Language Models for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33566-33576} }
CrossHOI: Learning Cross-View Representations for Monocular 3D Human-Object Interaction Reconstruction: Pei Geng,

Shanshan Zhang,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Pei and Zhang, Shanshan and Yang, Jian}, title = {CrossHOI: Learning Cross-View Representations for Monocular 3D Human-Object Interaction Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7121-7130} }
VULCAN: Tool-Augmented Multi Agents for Iterative 3D Object Arrangement: Zhengfei Kuang,

Rui Lin,

Long Zhao,

Gordon Wetzstein,

Saining Xie,

Sanghyun Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2026_CVPR, author = {Kuang, Zhengfei and Lin, Rui and Zhao, Long and Wetzstein, Gordon and Xie, Saining and Woo, Sanghyun}, title = {VULCAN: Tool-Augmented Multi Agents for Iterative 3D Object Arrangement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23763-23773} }
ArtLLM: Generating Articulated Assets via 3D LLM: Penghao Wang,

Siyuan Xie,

Hongyu Yan,

Xianghui Yang,

Jingwei Huang,

Chunchao Guo,

Jiayuan Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Penghao and Xie, Siyuan and Yan, Hongyu and Yang, Xianghui and Huang, Jingwei and Guo, Chunchao and Gu, Jiayuan}, title = {ArtLLM: Generating Articulated Assets via 3D LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34281-34291} }
DDT: Decoupled Diffusion Transformer: Shuai Wang,

Zhi Tian,

Weilin Huang,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shuai and Tian, Zhi and Huang, Weilin and Wang, Limin}, title = {DDT: Decoupled Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40633-40642} }
DiT360: High-Fidelity Panoramic Image Generation via Hybrid Training: Haoran Feng,

Dizhe Zhang,

Xiangtai Li,

Bo Du,

Lu Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Haoran and Zhang, Dizhe and Li, Xiangtai and Du, Bo and Qi, Lu}, title = {DiT360: High-Fidelity Panoramic Image Generation via Hybrid Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23367-23377} }
Pose-guided Enriched Feature Learning for Federated-by-camera Person Re-identification: JooHyung Oh,

Minyoung Oh,

Sung Whan Yoon,

Jae-Young Sim; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, JooHyung and Oh, Minyoung and Yoon, Sung Whan and Sim, Jae-Young}, title = {Pose-guided Enriched Feature Learning for Federated-by-camera Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40458-40467} }
ANTS: Adaptive Negative Textual Space Shaping for OOD Detection via Test-Time MLLM Understanding and Reasoning: Wenjie Zhu,

Yabin Zhang,

Xin Jin,

Wenjun Zeng,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenjie and Zhang, Yabin and Jin, Xin and Zeng, Wenjun and Zhang, Lei}, title = {ANTS: Adaptive Negative Textual Space Shaping for OOD Detection via Test-Time MLLM Understanding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20-30} }
HTTM: Head-wise Temporal Token Merging for Faster VGGT: Weitian Wang,

Lukas Meiner,

Rai Shubham,

Cecilia De La Parra,

Akash Kumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Weitian and Meiner, Lukas and Shubham, Rai and De La Parra, Cecilia and Kumar, Akash}, title = {HTTM: Head-wise Temporal Token Merging for Faster VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26379-26388} }
SenCache: Accelerating Diffusion Model Inference via Sensitivity-Aware Caching: Yasaman Haghighi,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haghighi_2026_CVPR, author = {Haghighi, Yasaman and Alahi, Alexandre}, title = {SenCache: Accelerating Diffusion Model Inference via Sensitivity-Aware Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14295-14304} }
PAI-Bench: A Comprehensive Benchmark For Physical AI: Fengzhe Zhou,

Jiannan Huang,

Jialuo Li,

Deva Ramanan,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Fengzhe and Huang, Jiannan and Li, Jialuo and Ramanan, Deva and Shi, Humphrey}, title = {PAI-Bench: A Comprehensive Benchmark For Physical AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21522-21536} }
Basis-Oriented Low-rank Transfer for Few-Shot and Test-Time Adaptation: Junghwan Park,

Woojin Cho,

Junhyuk Heo,

Darongsae Kwon,

Kookjin Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Junghwan and Cho, Woojin and Heo, Junhyuk and Kwon, Darongsae and Lee, Kookjin}, title = {Basis-Oriented Low-rank Transfer for Few-Shot and Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {860-870} }
GauMVC: Generative Decoupled Gaussian Representation for Human-centric Multi-view Video Compression: Ruoke Yan,

Mingjia Yang,

Xinfeng Zhang,

Haocheng Tang,

Qian Yin,

Zhipin Deng,

Kai Zhang,

Li Zhang,

Siwei Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Ruoke and Yang, Mingjia and Zhang, Xinfeng and Tang, Haocheng and Yin, Qian and Deng, Zhipin and Zhang, Kai and Zhang, Li and Ma, Siwei}, title = {GauMVC: Generative Decoupled Gaussian Representation for Human-centric Multi-view Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4963-4972} }
Intrinsic Geometry-Appearance Consistency Optimization for Sparse-View Gaussian Splatting: Kaiqiang Xiong,

Rui Peng,

Jiahao Wu,

Zhanke Wang,

Jie Liang,

Xiaoyun Zheng,

Feng Gao,

Ronggang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Kaiqiang and Peng, Rui and Wu, Jiahao and Wang, Zhanke and Liang, Jie and Zheng, Xiaoyun and Gao, Feng and Wang, Ronggang}, title = {Intrinsic Geometry-Appearance Consistency Optimization for Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40918-40928} }
ArtHOI: Taming Foundation Models for Monocular 4D Reconstruction of Hand-Articulated-Object Interactions: Zikai Wang,

Zhilu Zhang,

Yiqing Wang,

Hui Li,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zikai and Zhang, Zhilu and Wang, Yiqing and Li, Hui and Zuo, Wangmeng}, title = {ArtHOI: Taming Foundation Models for Monocular 4D Reconstruction of Hand-Articulated-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15998-16009} }
SpatialStack: Layered Geometry-Language Fusion for 3D VLM Spatial Reasoning: Jian Zhang,

Shijie Zhou,

Bangya Liu,

Achuta Kadambi,

Zhiwen Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jian and Zhou, Shijie and Liu, Bangya and Kadambi, Achuta and Fan, Zhiwen}, title = {SpatialStack: Layered Geometry-Language Fusion for 3D VLM Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38678-38688} }
Hint2Gen: Bridging Understanding and Generation via Code-structured Hints: Yuanpeng Tu,

Yunpeng Chen,

Xi Chen,

Liang Li,

Hengshuang Zhao; [pdf]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Yuanpeng and Chen, Yunpeng and Chen, Xi and Li, Liang and Zhao, Hengshuang}, title = {Hint2Gen: Bridging Understanding and Generation via Code-structured Hints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36593-36603} }
Goldilocks Test Sets for Face Verification: Haiyu Wu,

Sicong Tian,

Aman Bhatta,

Jacob Gutierrez,

Grace Bezold,

Genesis Argueta,

Karl Ricanek,

Michael C. King,

Kevin Bowyer; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haiyu and Tian, Sicong and Bhatta, Aman and Gutierrez, Jacob and Bezold, Grace and Argueta, Genesis and Ricanek, Karl and King, Michael C. and Bowyer, Kevin}, title = {Goldilocks Test Sets for Face Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35504-35513} }
Rethinking Occlusion Modeling for UAV Tracking: Jian Zhang,

Xincheng Yu,

Yi Lin; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jian and Yu, Xincheng and Lin, Yi}, title = {Rethinking Occlusion Modeling for UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13563-13573} }
LRDUN: A Low-Rank Deep Unfolding Network for Efficient Spectral Compressive Imaging: He Huang,

Yujun Guo,

Wei He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, He and Guo, Yujun and He, Wei}, title = {LRDUN: A Low-Rank Deep Unfolding Network for Efficient Spectral Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10556-10566} }
Exploring Spatial Intelligence from a Generative Perspective: Muzhi Zhu,

Shunyao Jiang,

Huanyi Zheng,

Zekai Luo,

Hao Zhong,

Anzhou Li,

Kaijun Wang,

Jintao Rong,

Yang Liu,

Hao Chen,

Tao Lin,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Muzhi and Jiang, Shunyao and Zheng, Huanyi and Luo, Zekai and Zhong, Hao and Li, Anzhou and Wang, Kaijun and Rong, Jintao and Liu, Yang and Chen, Hao and Lin, Tao and Shen, Chunhua}, title = {Exploring Spatial Intelligence from a Generative Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2582-2592} }
The Blind Spot of Adaptation: Quantifying and Mitigating Forgetting in Fine-tuned Driving Models: Runhao Mao,

Hanshi Wang,

Yixiang Yang,

Qianli Ma,

Jingmeng Zhou,

Zhipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Runhao and Wang, Hanshi and Yang, Yixiang and Ma, Qianli and Zhou, Jingmeng and Zhang, Zhipeng}, title = {The Blind Spot of Adaptation: Quantifying and Mitigating Forgetting in Fine-tuned Driving Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10621-10631} }
Abstract 3D Perception for Spatial Intelligence in Vision-Language Models: Yifan Liu,

Fangneng Zhan,

Kaichen Zhou,

Yilun Du,

Paul Pu Liang,

Hanspeter Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yifan and Zhan, Fangneng and Zhou, Kaichen and Du, Yilun and Liang, Paul Pu and Pfister, Hanspeter}, title = {Abstract 3D Perception for Spatial Intelligence in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38647-38656} }
Learning to Infer Parameterized Representations of Plants from 3D Scans: Samara Ghrer,

Christophe Godin,

Stefanie Wuhrer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghrer_2026_CVPR, author = {Ghrer, Samara and Godin, Christophe and Wuhrer, Stefanie}, title = {Learning to Infer Parameterized Representations of Plants from 3D Scans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42236-42245} }
Investigating Self-Supervised Representations for Audio-Visual Deepfake Detection: Dragos-Alexandru Boldisor,

Stefan Smeu,

Dan Oneata,

Elisabeta Oneata; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boldisor_2026_CVPR, author = {Boldisor, Dragos-Alexandru and Smeu, Stefan and Oneata, Dan and Oneata, Elisabeta}, title = {Investigating Self-Supervised Representations for Audio-Visual Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43018-43029} }
CLP: A Real-World Dataset of Contaminated Lens Protectors for Robust Semantic Segmentation: Sungyong Park,

Sooyoung Choi,

Hyunsuh Koh,

Youngjae Choi,

Heewon Kim; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Sungyong and Choi, Sooyoung and Koh, Hyunsuh and Choi, Youngjae and Kim, Heewon}, title = {CLP: A Real-World Dataset of Contaminated Lens Protectors for Robust Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3794-3804} }
TF-SSD: A Strong Pipeline via Synergic Mask Filter for Training-free Co-salient Object Detection: Zhijin He,

Shuo Jin,

Siyue Yu,

Shuwei Wu,

Bingfeng Zhang,

Li Yu,

Jimin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zhijin and Jin, Shuo and Yu, Siyue and Wu, Shuwei and Zhang, Bingfeng and Yu, Li and Xiao, Jimin}, title = {TF-SSD: A Strong Pipeline via Synergic Mask Filter for Training-free Co-salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32216-32225} }
QueryOcc: Query-based Self-Supervision for 3D Semantic Occupancy: Adam Lilja,

Ji Lan,

Junsheng Fu,

Lars Hammarstrand; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lilja_2026_CVPR, author = {Lilja, Adam and Lan, Ji and Fu, Junsheng and Hammarstrand, Lars}, title = {QueryOcc: Query-based Self-Supervision for 3D Semantic Occupancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21399-21408} }
Structure-to-Intensity Diffusion for Adverse-Weather LiDAR Generation: Peiyang Ni,

Longyu Yang,

Lu Zhang,

Kuniaki Saito,

Yap-Peng Tan,

Fumin Shen,

Heng Tao Shen,

Xiaofeng Zhu,

Ping Hu; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Peiyang and Yang, Longyu and Zhang, Lu and Saito, Kuniaki and Tan, Yap-Peng and Shen, Fumin and Shen, Heng Tao and Zhu, Xiaofeng and Hu, Ping}, title = {Structure-to-Intensity Diffusion for Adverse-Weather LiDAR Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35904-35914} }
InstAP: Instance-Aware Vision-Language Pre-Train for Spatial-Temporal Understanding: Ashutosh Kumar,

Rajat Saini,

Jingjing Pan,

Mustafa Erdogan,

Mingfang Zhang,

Betty Le Dem,

Norimasa Kobori,

Quan Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashutosh and Saini, Rajat and Pan, Jingjing and Erdogan, Mustafa and Zhang, Mingfang and Le Dem, Betty and Kobori, Norimasa and Kong, Quan}, title = {InstAP: Instance-Aware Vision-Language Pre-Train for Spatial-Temporal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3079-3090} }
AV-Reasoner: Improving and Benchmarking Clue-Grounded Audio-Visual Counting for MLLMs: Lidong Lu,

Guo Chen,

Zhu Wei,

Zhiqi Li,

Yicheng Liu,

Tong Lu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Lidong and Chen, Guo and Wei, Zhu and Li, Zhiqi and Liu, Yicheng and Lu, Tong}, title = {AV-Reasoner: Improving and Benchmarking Clue-Grounded Audio-Visual Counting for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33477-33487} }
HDW-SR: High-Frequency Guided Diffusion Model based on Wavelet Decomposition for Image Super-Resolution: Chao Yang,

Boqian Zhang,

Jinghao Xu,

Guang Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Chao and Zhang, Boqian and Xu, Jinghao and Jiang, Guang}, title = {HDW-SR: High-Frequency Guided Diffusion Model based on Wavelet Decomposition for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23462-23472} }
WRIVINDER: Towards Spatial Intelligence for Geo-locating Ground Images onto Satellite Imagery: Chandrakanth Gudavalli,

Tajuddin Manhar Mohammed,

Abhay Yadav,

Ananth Vishnu Bhaskar,

Hardik Prajapati,

Cheng Peng,

Rama Chellappa,

Shivkumar Chandrasekaran,

B.S. Manjunath; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gudavalli_2026_CVPR, author = {Gudavalli, Chandrakanth and Mohammed, Tajuddin Manhar and Yadav, Abhay and Bhaskar, Ananth Vishnu and Prajapati, Hardik and Peng, Cheng and Chellappa, Rama and Chandrasekaran, Shivkumar and Manjunath, B.S.}, title = {WRIVINDER: Towards Spatial Intelligence for Geo-locating Ground Images onto Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33703-33713} }
WaDi: Weight Direction-aware Distillation for One-step Image Synthesis: Lei Wang,

Yang Cheng,

Senmao Li,

Ge Wu,

Yaxing Wang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lei and Cheng, Yang and Li, Senmao and Wu, Ge and Wang, Yaxing and Yang, Jian}, title = {WaDi: Weight Direction-aware Distillation for One-step Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5574-5584} }
Polarization State Tracing for Reflection Removal and Color-Consistent Reconstruction: Dongyue Wang,

Yang Lu,

Jiandong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dongyue and Lu, Yang and Tian, Jiandong}, title = {Polarization State Tracing for Reflection Removal and Color-Consistent Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5680-5689} }
VIAFormer: Voxel-Image Alignment Transformer for High-Fidelity Voxel Refinement: Tiancheng Fang,

Bowen Pan,

Lingxi Chen,

Jiangjing Lyu,

Chengfei Lv,

Chaoyue Niu,

Fan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Tiancheng and Pan, Bowen and Chen, Lingxi and Lyu, Jiangjing and Lv, Chengfei and Niu, Chaoyue and Wu, Fan}, title = {VIAFormer: Voxel-Image Alignment Transformer for High-Fidelity Voxel Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29060-29070} }
MV-TAP: Tracking Any Point in Multi-View Videos: Jahyeok Koo,

Inès Hyeonsu Kim,

Mungyeom Kim,

Junghyun Park,

Seohyeon Park,

Jaeyeong Kim,

Jung Yi,

Seokju Cho,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koo_2026_CVPR, author = {Koo, Jahyeok and Kim, In\`es Hyeonsu and Kim, Mungyeom and Park, Junghyun and Park, Seohyeon and Kim, Jaeyeong and Yi, Jung and Cho, Seokju and Kim, Seungryong}, title = {MV-TAP: Tracking Any Point in Multi-View Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20932-20941} }
MHopReg: Efficient Hierarchical Multi-Hop Graph Search for Point Cloud Registration: Yue Wu,

Feng Xiao,

Yongzhe Yuan,

Hao Li,

Kaiyuan Feng,

Maoguo Gong,

Qiguang Miao,

Wenping Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yue and Xiao, Feng and Yuan, Yongzhe and Li, Hao and Feng, Kaiyuan and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {MHopReg: Efficient Hierarchical Multi-Hop Graph Search for Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24217-24226} }
PPISP: Physically-Plausible Compensation and Control of Photometric Variations in Radiance Field Reconstruction: Isaac Deutsch,

Nicolas Moënne-Loccoz,

Gavriel State,

Zan Gojcic; [pdf] [supp]
[bibtex]
@InProceedings{Deutsch_2026_CVPR, author = {Deutsch, Isaac and Mo\"enne-Loccoz, Nicolas and State, Gavriel and Gojcic, Zan}, title = {PPISP: Physically-Plausible Compensation and Control of Photometric Variations in Radiance Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7289-7298} }
MiniCPM-V 4.5: Cooking Efficient MLLMs via Architecture, Data, and Training Recipe: Tianyu Yu,

Zefan Wang,

Chongyi Wang,

Fuwei Huang,

Wenshuo Ma,

Zhihui He,

Tianchi Cai,

Weize Chen,

Yuxiang Huang,

Ranchi Zhao,

Bokai Xu,

Junbo Cui,

Yingjing Xu,

Liqing Ruan,

Luoyuan Zhang,

Hanyu Liu,

Jingkun Tang,

Hongyuan Liu,

Qining Guo,

Wenhao Hu,

Bingxiang He,

Jie Zhou,

Jie Cai,

Ji Qi,

Zonghao Guo,

Chi Chen,

Guoyang Zeng,

Yuxuan Li,

Ganqu Cui,

Ning Ding,

Xu Han,

Yuan Yao,

Zhiyuan Liu,

Maosong Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Tianyu and Wang, Zefan and Wang, Chongyi and Huang, Fuwei and Ma, Wenshuo and He, Zhihui and Cai, Tianchi and Chen, Weize and Huang, Yuxiang and Zhao, Ranchi and Xu, Bokai and Cui, Junbo and Xu, Yingjing and Ruan, Liqing and Zhang, Luoyuan and Liu, Hanyu and Tang, Jingkun and Liu, Hongyuan and Guo, Qining and Hu, Wenhao and He, Bingxiang and Zhou, Jie and Cai, Jie and Qi, Ji and Guo, Zonghao and Chen, Chi and Zeng, Guoyang and Li, Yuxuan and Cui, Ganqu and Ding, Ning and Han, Xu and Yao, Yuan and Liu, Zhiyuan and Sun, Maosong}, title = {MiniCPM-V 4.5: Cooking Efficient MLLMs via Architecture, Data, and Training Recipe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11704-11715} }
EVATok: Adaptive Length Video Tokenization for Efficient Visual Autoregressive Generation: Tianwei Xiong,

Jun Hao Liew,

Zilong Huang,

Zhijie Lin,

Jiashi Feng,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianwei and Liew, Jun Hao and Huang, Zilong and Lin, Zhijie and Feng, Jiashi and Liu, Xihui}, title = {EVATok: Adaptive Length Video Tokenization for Efficient Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23249-23259} }
End-to-End Language-Action Model for Humanoid Whole Body Control: Yuxuan Wang,

Haobin Jiang,

Shiqing Yao,

Ziluo Ding,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuxuan and Jiang, Haobin and Yao, Shiqing and Ding, Ziluo and Lu, Zongqing}, title = {End-to-End Language-Action Model for Humanoid Whole Body Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38399-38409} }
Self-Diffusion Driven Blind Imaging: Yanlong Yang,

Guanxiong Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yanlong and Luo, Guanxiong}, title = {Self-Diffusion Driven Blind Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26711-26720} }
Delta Rectified Flow Sampling for Text-to-Image Editing: Gaspard Beaudouin,

Minghan Li,

Jaeyeon Kim,

Sung-Hoon Yoon,

Mengyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beaudouin_2026_CVPR, author = {Beaudouin, Gaspard and Li, Minghan and Kim, Jaeyeon and Yoon, Sung-Hoon and Wang, Mengyu}, title = {Delta Rectified Flow Sampling for Text-to-Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18662-18672} }
Multi-level Causal LLM-based Text-to-Motion Generation with Human Alignment: Xiaodong Chen,

Qian Bao,

Xudong Liu,

Jianping Fang,

Jintao Fang,

Yongdong Zhang,

Tao Mei,

Wu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaodong and Bao, Qian and Liu, Xudong and Fang, Jianping and Fang, Jintao and Zhang, Yongdong and Mei, Tao and Liu, Wu}, title = {Multi-level Causal LLM-based Text-to-Motion Generation with Human Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9342-9351} }
EgoProx: Evaluating MLLMs on Egocentric 3D Proximity Reasoning Across a Cognitive Hierarchy: Jinzhao Li,

Yinuo Chen,

Dongxu Piao,

Panwang Pan,

Yifan Yu,

Dong Wang,

Honglei Yan,

Liang Yue,

Shaofei Wang,

Yixin Chen,

Siyuan Huang,

Miao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinzhao and Chen, Yinuo and Piao, Dongxu and Pan, Panwang and Yu, Yifan and Wang, Dong and Yan, Honglei and Yue, Liang and Wang, Shaofei and Chen, Yixin and Huang, Siyuan and Liu, Miao}, title = {EgoProx: Evaluating MLLMs on Egocentric 3D Proximity Reasoning Across a Cognitive Hierarchy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23751-23762} }
IDESplat: Iterative Depth Probability Estimation for Generalizable 3D Gaussian Splatting: Wei Long,

Haifeng Wu,

Shiyin Jiang,

Jinhua Zhang,

Xinchun Ji,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Wei and Wu, Haifeng and Jiang, Shiyin and Zhang, Jinhua and Ji, Xinchun and Gu, Shuhang}, title = {IDESplat: Iterative Depth Probability Estimation for Generalizable 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33248-33258} }
Reliable Policy Transfer for Safety-Aware End-to-End Driving with Deep Reinforcement Learning: Uddin Md. Borhan,

Arif Raza,

Zhiliang Lin,

Lu Wang,

Jianqiang Li,

Jie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Borhan_2026_CVPR, author = {Borhan, Uddin Md. and Raza, Arif and Lin, Zhiliang and Wang, Lu and Li, Jianqiang and Chen, Jie}, title = {Reliable Policy Transfer for Safety-Aware End-to-End Driving with Deep Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32134-32143} }
LacTokGen: Latent Consistency Tokenizer for 1024-pixel Image Generation by 256 Tokens: Qingsong Xie,

Luyuan Zhang,

Zhao Zhang,

Siyuan Li,

Zhe Huang,

Zhenyu Yang,

Haonan Lu; [pdf]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Qingsong and Zhang, Luyuan and Zhang, Zhao and Li, Siyuan and Huang, Zhe and Yang, Zhenyu and Lu, Haonan}, title = {LacTokGen: Latent Consistency Tokenizer for 1024-pixel Image Generation by 256 Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30368-30380} }
ForeHOI: Feed-forward 3D Object Reconstruction from Daily Hand-Object Interaction Videos: Yuantao Chen,

Jiahao Chang,

Chongjie Ye,

Chaoran Zhang,

Zhaojie Fang,

Chenghong Li,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuantao and Chang, Jiahao and Ye, Chongjie and Zhang, Chaoran and Fang, Zhaojie and Li, Chenghong and Han, Xiaoguang}, title = {ForeHOI: Feed-forward 3D Object Reconstruction from Daily Hand-Object Interaction Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8868-8879} }
BEV-SLD: Self-Supervised Scene Landmark Detection for Global Localization with LiDAR Bird's-Eye View Images: David Skuddis,

Vincent Ress,

Wei Zhang,

Vincent Ofosu Nyako,

Norbert Haala; [pdf] [supp]
[bibtex]
@InProceedings{Skuddis_2026_CVPR, author = {Skuddis, David and Ress, Vincent and Zhang, Wei and Nyako, Vincent Ofosu and Haala, Norbert}, title = {BEV-SLD: Self-Supervised Scene Landmark Detection for Global Localization with LiDAR Bird's-Eye View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31400-31409} }
Learning Generalizable 3D Medical Image Representations from Mask-Guided Self-Supervision: Yunhe Gao,

Yabin Zhang,

Chong Wang,

Jiaming Liu,

Maya Varma,

Jean-Benoit Delbrouck,

Akshay Chaudhari,

Curtis Langlotz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yunhe and Zhang, Yabin and Wang, Chong and Liu, Jiaming and Varma, Maya and Delbrouck, Jean-Benoit and Chaudhari, Akshay and Langlotz, Curtis}, title = {Learning Generalizable 3D Medical Image Representations from Mask-Guided Self-Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13744-13754} }
ST4R-Splat: Spatio-Temporal Referring Segmentation in 4D Gaussian Splatting: Yuming Meng,

Dong Wu,

Hongbin Zha; [pdf]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Yuming and Wu, Dong and Zha, Hongbin}, title = {ST4R-Splat: Spatio-Temporal Referring Segmentation in 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17598-17608} }
Dejavu: Towards Experience Feedback Learning for Embodied Intelligence: Shaokai Wu,

Yanbiao Ji,

Qiuchang Li,

Zhiyi Zhang,

Qichen He,

Wenyuan Xie,

Guodong Zhang,

Bayram Bayramli,

Yue Ding,

Hongtao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Shaokai and Ji, Yanbiao and Li, Qiuchang and Zhang, Zhiyi and He, Qichen and Xie, Wenyuan and Zhang, Guodong and Bayramli, Bayram and Ding, Yue and Lu, Hongtao}, title = {Dejavu: Towards Experience Feedback Learning for Embodied Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29578-29587} }
Toward Diffusible High-Dimensional Latent Spaces: A Frequency Perspective: Bolin Lai,

XuDong Wang,

Saketh Rambhatla,

James M. Rehg,

Zsolt Kira,

Rohit Girdhar,

Ishan Misra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Bolin and Wang, XuDong and Rambhatla, Saketh and Rehg, James M. and Kira, Zsolt and Girdhar, Rohit and Misra, Ishan}, title = {Toward Diffusible High-Dimensional Latent Spaces: A Frequency Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43450-43460} }
Archon: A Unified Multimodal Model for Holistic Digital Human Generation: Chong Bao,

Shichen Liu,

Lijun Yu,

David Futschik,

Stylianos Moschoglou,

Shefali Srivastava,

Ziqian Bai,

Feitong Tan,

Guofeng Zhang,

Zhaopeng Cui,

Sean Fanello,

Yinda Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Chong and Liu, Shichen and Yu, Lijun and Futschik, David and Moschoglou, Stylianos and Srivastava, Shefali and Bai, Ziqian and Tan, Feitong and Zhang, Guofeng and Cui, Zhaopeng and Fanello, Sean and Zhang, Yinda}, title = {Archon: A Unified Multimodal Model for Holistic Digital Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16464-16474} }
Suppressing Non-Semantic Noise in Masked Image Modeling Representations: Martine Hjelkrem-Tan,

Marius Aasan,

Rwiddhi Chakraborty,

Gabriel Y. Arteaga,

Changkyu Choi,

Adín Ramírez Rivera; [pdf] [supp]
[bibtex]
@InProceedings{Hjelkrem-Tan_2026_CVPR, author = {Hjelkrem-Tan, Martine and Aasan, Marius and Chakraborty, Rwiddhi and Arteaga, Gabriel Y. and Choi, Changkyu and Rivera, Ad{\'\i}n Ram{\'\i}rez}, title = {Suppressing Non-Semantic Noise in Masked Image Modeling Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19319-19329} }
CORE: Compact Object-centric REpresentations as a New Paradigm for Token Merging in LVLMs: Jingyu Lei,

Gaoang Wang,

Der-Horng Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Jingyu and Wang, Gaoang and Lee, Der-Horng}, title = {CORE: Compact Object-centric REpresentations as a New Paradigm for Token Merging in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39593-39605} }
Harmonized Feature Conditioning and Frequency-Prompt Personalization for Multi-Rater Medical Segmentation: Sanaz Karimijafarbigloo,

Armin Khosravi,

Alireza Kheyrkhah,

Reza Azad,

Mauricio Reyes,

Dorit Merhof; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karimijafarbigloo_2026_CVPR, author = {Karimijafarbigloo, Sanaz and Khosravi, Armin and Kheyrkhah, Alireza and Azad, Reza and Reyes, Mauricio and Merhof, Dorit}, title = {Harmonized Feature Conditioning and Frequency-Prompt Personalization for Multi-Rater Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22794-22803} }
Velox: Learning Representations of 4D Geometry and Appearance: Anagh Malik,

Dorian Chan,

Xiaoming Zhao,

David B. Lindell,

Oncel Tuzel,

Jen-Hao Rick Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Malik_2026_CVPR, author = {Malik, Anagh and Chan, Dorian and Zhao, Xiaoming and Lindell, David B. and Tuzel, Oncel and Chang, Jen-Hao Rick}, title = {Velox: Learning Representations of 4D Geometry and Appearance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19894-19906} }
Beyond Matching to Tiles: Bridging Unaligned Aerial and Satellite Views for Vision-Only UAV Navigation: Kejia Liu,

Haoyang Zhou,

Ruoyu Xu,

Peicheng Wang,

Mingli Song,

Haofei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Kejia and Zhou, Haoyang and Xu, Ruoyu and Wang, Peicheng and Song, Mingli and Zhang, Haofei}, title = {Beyond Matching to Tiles: Bridging Unaligned Aerial and Satellite Views for Vision-Only UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5359-5368} }
TRM-VLA: Temporal-Aware Chain-of-Thought Reasoning and Memorization for Vision-Language-Action Models: Xiang Li,

Ya-Li Li,

Yuan Wang,

Shengjin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Li, Ya-Li and Wang, Yuan and Wang, Shengjin}, title = {TRM-VLA: Temporal-Aware Chain-of-Thought Reasoning and Memorization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10943-10953} }
Make it SING: Analyzing Semantic Invariants in Classifiers: Harel Yadid,

Meir Yossef Levi,

Roy Betser,

Guy Gilboa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yadid_2026_CVPR, author = {Yadid, Harel and Levi, Meir Yossef and Betser, Roy and Gilboa, Guy}, title = {Make it SING: Analyzing Semantic Invariants in Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9911-9920} }
PRISM: Prototype-based Reasoning with Inter-modal Semantic Mining for Interpretable Image Recognition: Anni Yu,

Yu-Bin Yang; [pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Anni and Yang, Yu-Bin}, title = {PRISM: Prototype-based Reasoning with Inter-modal Semantic Mining for Interpretable Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2853-2863} }
Aligning Text, Images and 3D Structure Token-by-Token: Aadarsh Sahoo,

Vansh Tibrewal,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sahoo_2026_CVPR, author = {Sahoo, Aadarsh and Tibrewal, Vansh and Gkioxari, Georgia}, title = {Aligning Text, Images and 3D Structure Token-by-Token}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14905-14914} }
Event Structural Valley: A Unified Theoretical and Practical Framework for Event Camera Autofocus: Xijie Xiang,

Lin Zhu,

Wei Zhang,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Xijie and Zhu, Lin and Zhang, Wei and Tian, Yonghong}, title = {Event Structural Valley: A Unified Theoretical and Practical Framework for Event Camera Autofocus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {936-945} }
Linking Perception, Confidence and Accuracy in MLLMs: Yuetian Du,

Yucheng Wang,

Rongyu Zhang,

Zhijie Xu,

Boyu Yang,

Ming Kong,

Jie Liu,

Qiang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Yuetian and Wang, Yucheng and Zhang, Rongyu and Xu, Zhijie and Yang, Boyu and Kong, Ming and Liu, Jie and Zhu, Qiang}, title = {Linking Perception, Confidence and Accuracy in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25914-25924} }
Diagram2Structure: Unlocking LLMs' Diagram Comprehension through DiagramDiff, a Framework for Structuring Offline Diagrams: Haoxiang Hu,

Yaokun Li,

Zeyuan Huang,

Cangjun Gao,

Qiang He,

Qingkun Li,

Xiaoming Deng,

Cuixia Ma,

Yu-Kun Lai,

Yong-Jin Liu,

Hongan Wang; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Haoxiang and Li, Yaokun and Huang, Zeyuan and Gao, Cangjun and He, Qiang and Li, Qingkun and Deng, Xiaoming and Ma, Cuixia and Lai, Yu-Kun and Liu, Yong-Jin and Wang, Hongan}, title = {Diagram2Structure: Unlocking LLMs' Diagram Comprehension through DiagramDiff, a Framework for Structuring Offline Diagrams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24395-24404} }
BiMotion: B-spline Motion for Text-guided Dynamic 3D Character Generation: Miaowei Wang,

Qingxuan Yan,

Zhi Cao,

Yayuan Li,

Oisin Mac Aodha,

Jason J Corso,

Amir Vaxman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Miaowei and Yan, Qingxuan and Cao, Zhi and Li, Yayuan and Mac Aodha, Oisin and Corso, Jason J and Vaxman, Amir}, title = {BiMotion: B-spline Motion for Text-guided Dynamic 3D Character Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10152-10164} }
AVA-Bench: Atomic Visual Ability Benchmark for Vision Foundation Models: Zheda Mai,

Arpita Chowdhury,

Zihe Wang,

Sooyoung Jeon,

Lemeng Wang,

Jiacheng Hou,

Jihyung Kil,

Wei-Lun Chao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Zheda and Chowdhury, Arpita and Wang, Zihe and Jeon, Sooyoung and Wang, Lemeng and Hou, Jiacheng and Kil, Jihyung and Chao, Wei-Lun}, title = {AVA-Bench: Atomic Visual Ability Benchmark for Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25925-25937} }
MacTok: Robust Continuous Tokenization for Image Generation: Hengyu Zeng,

Xin Gao,

Guanghao Li,

Yuxiang Yan,

Jiaoyang Ruan,

Junpeng Ma,

Haoyu Albert Wang,

Jian Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Hengyu and Gao, Xin and Li, Guanghao and Yan, Yuxiang and Ruan, Jiaoyang and Ma, Junpeng and Wang, Haoyu Albert and Pu, Jian}, title = {MacTok: Robust Continuous Tokenization for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43407-43417} }
JRM: Joint Reconstruction Model for Multiple Objects without Alignment: Qirui Wu,

Yawar Siddiqui,

Duncan Frost,

Samir Aroudj,

Armen Avetisyan,

Richard Newcombe,

Angel X. Chang,

Jakob Engel,

Henry Howard-Jenkins; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Qirui and Siddiqui, Yawar and Frost, Duncan and Aroudj, Samir and Avetisyan, Armen and Newcombe, Richard and Chang, Angel X. and Engel, Jakob and Howard-Jenkins, Henry}, title = {JRM: Joint Reconstruction Model for Multiple Objects without Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {307-316} }
CAPT: Confusion-Aware Prompt Tuning for Reducing Vision-Language Misalignment: Maoyuan Shao,

Yutong Gao,

Xinyang Huang,

Lijuan Sun,

Guoshun Nan,

Chuang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Maoyuan and Gao, Yutong and Huang, Xinyang and Sun, Lijuan and Nan, Guoshun and Zhu, Chuang}, title = {CAPT: Confusion-Aware Prompt Tuning for Reducing Vision-Language Misalignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3154-3164} }
MarkushGrapher-2: End-to-end Multimodal Recognition of Chemical Structures: Tim Strohmeyer,

Lucas Morin,

Gerhard Ingmar Meijer,

Valery Weber,

Ahmed Nassar,

Peter Staar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Strohmeyer_2026_CVPR, author = {Strohmeyer, Tim and Morin, Lucas and Meijer, Gerhard Ingmar and Weber, Valery and Nassar, Ahmed and Staar, Peter}, title = {MarkushGrapher-2: End-to-end Multimodal Recognition of Chemical Structures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31176-31185} }
MOMO: Mars Orbital MOdel Foundation Model for Mars Orbital Applications: Mirali Purohit,

Bimal Gajera,

Irish Mehta,

Bhanu Tokas,

Jacob Adler,

Steven Lu,

Scott Dickenshied,

Serina Diniega,

Brian Bue,

Umaa Rebbapragada,

Hannah Kerner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Purohit_2026_CVPR, author = {Purohit, Mirali and Gajera, Bimal and Mehta, Irish and Tokas, Bhanu and Adler, Jacob and Lu, Steven and Dickenshied, Scott and Diniega, Serina and Bue, Brian and Rebbapragada, Umaa and Kerner, Hannah}, title = {MOMO: Mars Orbital MOdel Foundation Model for Mars Orbital Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27772-27782} }
EG-3DVG: Expression and Geometry Aware Grounding Decoder for 3D Visual Grounding: GwangWook Park,

Hyo-Jun Lee,

Jong-Hyeon Baek,

Hanul Kim,

Yeong Jun Koh; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, GwangWook and Lee, Hyo-Jun and Baek, Jong-Hyeon and Kim, Hanul and Koh, Yeong Jun}, title = {EG-3DVG: Expression and Geometry Aware Grounding Decoder for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2625-2634} }
LLaDA-V: Large Language Diffusion Models with Visual Instruction Tuning: Zebin You,

Shen Nie,

Xiaolu Zhang,

JUN ZHOU,

Zhiwu Lu,

Ji-Rong Wen,

Chongxuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Zebin and Nie, Shen and Zhang, Xiaolu and ZHOU, JUN and Lu, Zhiwu and Wen, Ji-Rong and Li, Chongxuan}, title = {LLaDA-V: Large Language Diffusion Models with Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10093-10105} }
C-GenReg: Training-Free 3D Point Cloud Registration by Multi-View-Consistent Geometry-to-Image Generation with Probabilistic Modalities Fusion: Yuval Haitman,

Amit Efraim,

Joseph M. Francos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haitman_2026_CVPR, author = {Haitman, Yuval and Efraim, Amit and Francos, Joseph M.}, title = {C-GenReg: Training-Free 3D Point Cloud Registration by Multi-View-Consistent Geometry-to-Image Generation with Probabilistic Modalities Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3004-3013} }
Scene-VLM: Multimodal Video Scene Segmentation via Vision-Language Models: Nimrod Berman,

Adam Botach,

Emanuel Ben-Baruch,

Shunit Haviv Hakimi,

Asaf Gendler,

Ilan Naiman,

Erez Yosef,

Igor Kviatkovsky; [pdf] [supp]
[bibtex]
@InProceedings{Berman_2026_CVPR, author = {Berman, Nimrod and Botach, Adam and Ben-Baruch, Emanuel and Hakimi, Shunit Haviv and Gendler, Asaf and Naiman, Ilan and Yosef, Erez and Kviatkovsky, Igor}, title = {Scene-VLM: Multimodal Video Scene Segmentation via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39848-39857} }
Synergistic Bleeding Region and Point Detection in Laparoscopic Surgical Videos: Jialun Pei,

Zhangjun Zhou,

Diandian Guo,

Zhixi Li,

Jing Qin,

Bo Du,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2026_CVPR, author = {Pei, Jialun and Zhou, Zhangjun and Guo, Diandian and Li, Zhixi and Qin, Jing and Du, Bo and Heng, Pheng-Ann}, title = {Synergistic Bleeding Region and Point Detection in Laparoscopic Surgical Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1396-1405} }
CICA: Coupling Confidence-Aware Pretraining with Confidence-Informed Attention for Robust Multimodal Sentiment Analysis: Haoyu Jiang,

Xiaoliang Chen,

Duoqian Miao,

Xiaolin Qin,

Xianyong Li,

Yajun Du; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haoyu and Chen, Xiaoliang and Miao, Duoqian and Qin, Xiaolin and Li, Xianyong and Du, Yajun}, title = {CICA: Coupling Confidence-Aware Pretraining with Confidence-Informed Attention for Robust Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37842-37851} }
LiDAR Prompted Spatio-Temporal Multi-View Stereo for Autonomous Driving: Qihao Sun,

Jiarun Liu,

Ziqian Ni,

Jianyun Xu,

Sheng Yang,

Tao Xie,

Lijun Zhao,

Ruifeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Qihao and Liu, Jiarun and Ni, Ziqian and Xu, Jianyun and Yang, Sheng and Xie, Tao and Zhao, Lijun and Li, Ruifeng}, title = {LiDAR Prompted Spatio-Temporal Multi-View Stereo for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14567-14577} }
LongVideo-R1: Smart Navigation for Low-cost Long Video Understanding: Jihao Qiu,

Lingxi Xie,

Xinyue Huo,

Qi Tian,

Qixiang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Jihao and Xie, Lingxi and Huo, Xinyue and Tian, Qi and Ye, Qixiang}, title = {LongVideo-R1: Smart Navigation for Low-cost Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40505-40515} }
Zero-Shot Image Denoising via Hybrid Prior-Guided Pseudo Sample Generation: Xiaole Zhao,

Qingsong Pang,

Xiaobo Zhang,

Xun Xu,

Xun Gong,

Yan Yang,

Tianrui Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xiaole and Pang, Qingsong and Zhang, Xiaobo and Xu, Xun and Gong, Xun and Yang, Yan and Li, Tianrui}, title = {Zero-Shot Image Denoising via Hybrid Prior-Guided Pseudo Sample Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22648-22657} }
CoIn: Coverage and Informativeness-Guided Token Reduction for Efficient Large Multimodal Models: Chenxi Du,

Yongheng Deng,

Jiani Liu,

Yujia Zhang,

Xi Chen,

Ju Ren; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Chenxi and Deng, Yongheng and Liu, Jiani and Zhang, Yujia and Chen, Xi and Ren, Ju}, title = {CoIn: Coverage and Informativeness-Guided Token Reduction for Efficient Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10492-10501} }
EVA: Efficient Reinforcement Learning for End-to-End Video Agent: Yaolun Zhang,

Ruohui Wang,

Jiahao Wang,

Yepeng Tang,

Xuanyu Zheng,

Haonan Duan,

Hao Lu,

Hanming Deng,

Lewei Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yaolun and Wang, Ruohui and Wang, Jiahao and Tang, Yepeng and Zheng, Xuanyu and Duan, Haonan and Lu, Hao and Deng, Hanming and Lu, Lewei}, title = {EVA: Efficient Reinforcement Learning for End-to-End Video Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12289-12299} }
HyCal: A Training-Free Prototype Calibration Method for Cross-Discipline Few-Shot Class-Incremental Learning: Eunju Lee,

MiHyeon Kim,

JuneHyoung Kwon,

Yoonji Lee,

JiHyun Kim,

Soojin Jang,

YoungBin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Eunju and Kim, MiHyeon and Kwon, JuneHyoung and Lee, Yoonji and Kim, JiHyun and Jang, Soojin and Kim, YoungBin}, title = {HyCal: A Training-Free Prototype Calibration Method for Cross-Discipline Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29462-29471} }
ADSeeker: A Knowledge-Grounded Reasoning Framework for Industry Anomaly Detection and Reasoning: Kai Zhang,

Zekai Zhang,

Xihe Sun,

Anpeng Wang,

Jingmeng Nie,

Qinghui Chen,

Han Hao,

Jianyuan Guo,

Jinglin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kai and Zhang, Zekai and Sun, Xihe and Wang, Anpeng and Nie, Jingmeng and Chen, Qinghui and Hao, Han and Guo, Jianyuan and Zhang, Jinglin}, title = {ADSeeker: A Knowledge-Grounded Reasoning Framework for Industry Anomaly Detection and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21379-21388} }
When Local Rules Create Global Order: Self-Organized Representation Learning for Latent Diffusion Models: Junrong Lian,

Weijian Deng,

Pengxu Wei,

Yaqin Chen,

Qixiang Ye,

Liang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Junrong and Deng, Weijian and Wei, Pengxu and Chen, Yaqin and Ye, Qixiang and Lin, Liang}, title = {When Local Rules Create Global Order: Self-Organized Representation Learning for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9445-9454} }
HDR-VLM: HDR-Domain Adaptation of VLMs and Preference-Aligned Quality Assessment for HDR Video Color Grading: Hao Yuan,

Jiabin Zhang,

Yajing Wu,

Ruixuan Pang,

Jing Li; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Hao and Zhang, Jiabin and Wu, Yajing and Pang, Ruixuan and Li, Jing}, title = {HDR-VLM: HDR-Domain Adaptation of VLMs and Preference-Aligned Quality Assessment for HDR Video Color Grading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40792-40801} }
MeshFlow: Efficient Artistic Mesh Generation via MeshVAE and Flow-based Diffusion Transformer: Weiyu Li,

Antoine Toisoul,

Tom Monnier,

Roman Shapovalov,

Rakesh Ranjan,

Ping Tan,

Andrea Vedaldi; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiyu and Toisoul, Antoine and Monnier, Tom and Shapovalov, Roman and Ranjan, Rakesh and Tan, Ping and Vedaldi, Andrea}, title = {MeshFlow: Efficient Artistic Mesh Generation via MeshVAE and Flow-based Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5849-5858} }
ReMoRa: Multimodal Large Language Model based on Refined Motion Representation for Long-Video Understanding: Daichi Yashima,

Shuhei Kurita,

Yusuke Oda,

Komei Sugiura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yashima_2026_CVPR, author = {Yashima, Daichi and Kurita, Shuhei and Oda, Yusuke and Sugiura, Komei}, title = {ReMoRa: Multimodal Large Language Model based on Refined Motion Representation for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31845-31855} }
Seeing without Pixels: Perception from Camera Trajectories: Zihui Xue,

Kristen Grauman,

Dima Damen,

Andrew Zisserman,

Tengda Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Zihui and Grauman, Kristen and Damen, Dima and Zisserman, Andrew and Han, Tengda}, title = {Seeing without Pixels: Perception from Camera Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38836-38847} }
UniLS: End-to-End Audio-Driven Avatars for Unified Listening and Speaking: Xuangeng Chu,

Ruicong Liu,

Yifei Huang,

Yun Liu,

Yichen Peng,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Xuangeng and Liu, Ruicong and Huang, Yifei and Liu, Yun and Peng, Yichen and Zheng, Bo}, title = {UniLS: End-to-End Audio-Driven Avatars for Unified Listening and Speaking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25142-25152} }
CineScene: Implicit 3D as Effective Scene Representation for Cinematic Video Generation: Kaiyi Huang,

Yukun Huang,

Yu Li,

Jianhong Bai,

Xintao Wang,

Zinan Lin,

Xuefei Ning,

Jiwen Yu,

Yu Wang,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Kaiyi and Huang, Yukun and Li, Yu and Bai, Jianhong and Wang, Xintao and Lin, Zinan and Ning, Xuefei and Yu, Jiwen and Wang, Yu and Liu, Xihui}, title = {CineScene: Implicit 3D as Effective Scene Representation for Cinematic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25381-25392} }
NIL: No-data Imitation Learning: Mert Albaba,

Chenhao Li,

Markos Diomataris,

Omid Taheri,

Andreas Krause,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Albaba_2026_CVPR, author = {Albaba, Mert and Li, Chenhao and Diomataris, Markos and Taheri, Omid and Krause, Andreas and Black, Michael J.}, title = {NIL: No-data Imitation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20823-20833} }
CrackSSM: Reviving SSMs for Crack Segmentation via Dynamic Scanning: Yubin Gu,

Boyang Hou,

Yuan Meng,

Wenting Luo,

Jiayi Ji,

Xiaoshuai Sun; [pdf]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Yubin and Hou, Boyang and Meng, Yuan and Luo, Wenting and Ji, Jiayi and Sun, Xiaoshuai}, title = {CrackSSM: Reviving SSMs for Crack Segmentation via Dynamic Scanning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10721-10730} }
A Unified Framework for Knowledge Transfer in Bidirectional Model Scaling: Jianlu Shen,

Fu Feng,

Jiaze Xu,

Yucheng Xie,

Jiaqi Lv,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Jianlu and Feng, Fu and Xu, Jiaze and Xie, Yucheng and Lv, Jiaqi and Geng, Xin}, title = {A Unified Framework for Knowledge Transfer in Bidirectional Model Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34535-34545} }
Humanoid Generative Pre-Training for Zero-Shot Motion Tracking: Zekun Qi,

Xuchuan Chen,

Jilong Wang,

Chenghuai Lin,

Yunrui Lian,

Wenyao Zhang,

Xinqiang Yu,

He Wang,

Li Yi; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Zekun and Chen, Xuchuan and Wang, Jilong and Lin, Chenghuai and Lian, Yunrui and Zhang, Wenyao and Yu, Xinqiang and Wang, He and Yi, Li}, title = {Humanoid Generative Pre-Training for Zero-Shot Motion Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20834-20844} }
MoECLIP: Patch-Specialized Experts for Zero-shot Anomaly Detection: Jun Yeong Park,

JunYoung Seo,

Minji Kang,

Yu Rang Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jun Yeong and Seo, JunYoung and Kang, Minji and Park, Yu Rang}, title = {MoECLIP: Patch-Specialized Experts for Zero-shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35534-35544} }
SAMIX: Reinforcing SAM2 with Semantic Adapter and Reference Selecting Policy for Mix-Supervised Segmentation: Qiang Hu,

Jiajie Wei,

Zhenyu Yi,

Zhifen Yan,

Yingjie Guo,

Hongkuan Shi,

Ge-Peng Ji,

Qiang Li,

Zhiwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Qiang and Wei, Jiajie and Yi, Zhenyu and Yan, Zhifen and Guo, Yingjie and Shi, Hongkuan and Ji, Ge-Peng and Li, Qiang and Wang, Zhiwei}, title = {SAMIX: Reinforcing SAM2 with Semantic Adapter and Reference Selecting Policy for Mix-Supervised Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17948-17958} }
Rectifying Latent Space for Generative Single-Image Reflection Removal: Mingjia Li,

Jin Hu,

Hainuo Wang,

Qiming Hu,

Jiarui Wang,

Xiaojie Guo; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingjia and Hu, Jin and Wang, Hainuo and Hu, Qiming and Wang, Jiarui and Guo, Xiaojie}, title = {Rectifying Latent Space for Generative Single-Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8397-8407} }
Unstitching the Chimera: Frame-Level Risk and Train-Free Mitigation for Video Hallucination: Songyuan Yang,

Guijian Tang,

Kun Hu,

Haotian Wang,

Shixuan Liu,

Wenjing Yang,

Long Lan,

Huibin Tan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Tang, Guijian and Hu, Kun and Wang, Haotian and Liu, Shixuan and Yang, Wenjing and Lan, Long and Tan, Huibin}, title = {Unstitching the Chimera: Frame-Level Risk and Train-Free Mitigation for Video Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4188-4198} }
Refracting Reality: Generating Images with Realistic Transparent Objects: Yue Yin,

Enze Tao,

Dylan Campbell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Yue and Tao, Enze and Campbell, Dylan}, title = {Refracting Reality: Generating Images with Realistic Transparent Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4312-4321} }
SLARM: Streaming and Language-Aligned Reconstruction Model for Dynamic Scenes: Zhicheng Qiu,

Jiarui Meng,

Tong-an Luo,

Yican Huang,

Xuan Feng,

Xuanfu Li,

Zhan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Zhicheng and Meng, Jiarui and Luo, Tong-an and Huang, Yican and Feng, Xuan and Li, Xuanfu and Xu, Zhan}, title = {SLARM: Streaming and Language-Aligned Reconstruction Model for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29023-29034} }
Stochastic Ray Tracing for the Reconstruction of 3D Gaussian Splatting: Peiyu Xu,

Shuang Zhao,

Xin Sun,

Krishna Mullia,

Raymond Fei,

Iliyan Georgiev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Peiyu and Zhao, Shuang and Sun, Xin and Mullia, Krishna and Fei, Raymond and Georgiev, Iliyan}, title = {Stochastic Ray Tracing for the Reconstruction of 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19001-19010} }
Uni-Encoder Meets Multi-Encoders: Representation Before Fusion for Brain Tumor Segmentation with Missing Modalities: Peibo Song,

Xiaotian Xue,

Jinshuo Zhang,

Zihao Wang,

Jinhua Liu,

Shujun Fu,

Fangxun Bao,

Si Yong Yeo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Peibo and Xue, Xiaotian and Zhang, Jinshuo and Wang, Zihao and Liu, Jinhua and Fu, Shujun and Bao, Fangxun and Yeo, Si Yong}, title = {Uni-Encoder Meets Multi-Encoders: Representation Before Fusion for Brain Tumor Segmentation with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15627-15638} }
Training-free Mixed-Resolution Latent Upsampling for Spatially Accelerated Diffusion Transformers: Wongi Jeong,

Kyungryeol Lee,

Hoigi Seo,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wongi and Lee, Kyungryeol and Seo, Hoigi and Chun, Se Young}, title = {Training-free Mixed-Resolution Latent Upsampling for Spatially Accelerated Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18673-18682} }
PersonaLive! Expressive Portrait Image Animation for Live Streaming: Zhiyuan Li,

Chi-Man Pun,

Chen Fang,

Jue Wang,

Xiaodong Cun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiyuan and Pun, Chi-Man and Fang, Chen and Wang, Jue and Cun, Xiaodong}, title = {PersonaLive! Expressive Portrait Image Animation for Live Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18118-18128} }
ProM3E: Probabilistic Masked MultiModal Embedding Model for Ecology: Srikumar Sastry,

Subash Khanal,

Aayush Dhakal,

Jiayu Lin,

Dan Cher,

Phoenix Jarosz,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sastry_2026_CVPR, author = {Sastry, Srikumar and Khanal, Subash and Dhakal, Aayush and Lin, Jiayu and Cher, Dan and Jarosz, Phoenix and Jacobs, Nathan}, title = {ProM3E: Probabilistic Masked MultiModal Embedding Model for Ecology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20564-20574} }
Unified Camera Positional Encoding for Controlled Video Generation: Cheng Zhang,

Boying Li,

Meng Wei,

Yan-Pei Cao,

Camilo Gambardella,

Dinh Phung,

Jianfei Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Cheng and Li, Boying and Wei, Meng and Cao, Yan-Pei and Gambardella, Camilo and Phung, Dinh and Cai, Jianfei}, title = {Unified Camera Positional Encoding for Controlled Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38027-38037} }
Toward Low-Cost yet Effective Temporal Learning for UAV Tracking: Chaocan Xue,

Qihua Liang,

Bineng Zhong,

Yanting Zu,

Yuanliang Xue,

Haiying Xia,

Shuxiang Song; [pdf]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Chaocan and Liang, Qihua and Zhong, Bineng and Zu, Yanting and Xue, Yuanliang and Xia, Haiying and Song, Shuxiang}, title = {Toward Low-Cost yet Effective Temporal Learning for UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42538-42548} }
Refining Few-Step Text-to-Multiview Diffusion via Reinforcement Learning: Ziyi Zhang,

Li Shen,

Deheng Ye,

Yong Luo,

Huangxuan Zhao,

Meng Liu,

Wei Yu,

Lefei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ziyi and Shen, Li and Ye, Deheng and Luo, Yong and Zhao, Huangxuan and Liu, Meng and Yu, Wei and Zhang, Lefei}, title = {Refining Few-Step Text-to-Multiview Diffusion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2401-2411} }
Rethinking Intermediate Representation for VLM-based Robot Manipulation: Weiliang Tang,

Jialin Gao,

Jia-Hui Pan,

Gang Wang,

Li Erran Li,

Yun-Hui Liu,

Mingyu Ding,

Pheng-Ann Heng,

Chi-Wing Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Weiliang and Gao, Jialin and Pan, Jia-Hui and Wang, Gang and Li, Li Erran and Liu, Yun-Hui and Ding, Mingyu and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {Rethinking Intermediate Representation for VLM-based Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29652-29662} }
Learning Diffeomorphism for Medical Image Registration with Time-Embedded Architectures Using Semigroup Regularization: Mohammadjavad Matinkia,

Nilanjan Ray; [pdf] [supp]
[bibtex]
@InProceedings{Matinkia_2026_CVPR, author = {Matinkia, Mohammadjavad and Ray, Nilanjan}, title = {Learning Diffeomorphism for Medical Image Registration with Time-Embedded Architectures Using Semigroup Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28775-28785} }
FUN REC * Reconstructing Functional 3D Scenes from Egocentric Interaction Videos: Alexandros Delitzas,

Chenyangguang Zhang,

Alexey Gavryushin,

Tommaso Di Mario,

Boyang Sun,

Rishabh Dabral,

Leonidas Guibas,

Christian Theobalt,

Marc Pollefeys,

Francis Engelmann,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Delitzas_2026_CVPR, author = {Delitzas, Alexandros and Zhang, Chenyangguang and Gavryushin, Alexey and Di Mario, Tommaso and Sun, Boyang and Dabral, Rishabh and Guibas, Leonidas and Theobalt, Christian and Pollefeys, Marc and Engelmann, Francis and Barath, Daniel}, title = {FUN REC * Reconstructing Functional 3D Scenes from Egocentric Interaction Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28446-28457} }
Tri-Modal Fusion Transformers for UAV-based Object Detection: Craig Iaboni,

Pramod Abichandani; [pdf] [arXiv]
[bibtex]
@InProceedings{Iaboni_2026_CVPR, author = {Iaboni, Craig and Abichandani, Pramod}, title = {Tri-Modal Fusion Transformers for UAV-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4373-4382} }
RetFormer: Multimodal Retrieval for Enhancing Image Recognition: Tianrui Yu,

Xiubo Liang,

Hongzhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Tianrui and Liang, Xiubo and Wang, Hongzhi}, title = {RetFormer: Multimodal Retrieval for Enhancing Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2704-2714} }
Revisiting Unknowns: Towards Effective and Efficient Open-Set Active Learning: Chen-Chen Zong,

Yu-Qi Chi,

Xie-Yang Wang,

Yan Cui,

Sheng-Jun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zong_2026_CVPR, author = {Zong, Chen-Chen and Chi, Yu-Qi and Wang, Xie-Yang and Cui, Yan and Huang, Sheng-Jun}, title = {Revisiting Unknowns: Towards Effective and Efficient Open-Set Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17756-17765} }
Driving on Registers: Ellington Kirby,

Alexandre Boulch,

Yihong Xu,

Yuan Yin,

Gilles Puy,

Éloi Zablocki,

Andrei Bursuc,

Spyros Gidaris,

Renaud Marlet,

Florent Bartoccioni,

Anh-Quan Cao,

Nermin Samet,

Tuan-Hung VU,

Matthieu Cord; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kirby_2026_CVPR, author = {Kirby, Ellington and Boulch, Alexandre and Xu, Yihong and Yin, Yuan and Puy, Gilles and Zablocki, \'Eloi and Bursuc, Andrei and Gidaris, Spyros and Marlet, Renaud and Bartoccioni, Florent and Cao, Anh-Quan and Samet, Nermin and VU, Tuan-Hung and Cord, Matthieu}, title = {Driving on Registers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32058-32069} }
MoE-GRPO: Optimizing Mixture-of-Experts via Reinforcement Learning in Vision-Language Models: Dohwan Ko,

Jinyoung Park,

Seoung Choi,

Sanghyeok Lee,

Seohyun Lee,

Hyunwoo J. Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Dohwan and Park, Jinyoung and Choi, Seoung and Lee, Sanghyeok and Lee, Seohyun and Kim, Hyunwoo J.}, title = {MoE-GRPO: Optimizing Mixture-of-Experts via Reinforcement Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14957-14967} }
Sparse Task Vector Mixup with Hypernetworks for Efficient Knowledge Transfer in Whole-Slide Image Prognosis: Pei Liu,

Xiangxiang Zeng,

Tengfei Ma,

Yucheng Xing,

Xuanbai Ren,

Yiping Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Pei and Zeng, Xiangxiang and Ma, Tengfei and Xing, Yucheng and Ren, Xuanbai and Liu, Yiping}, title = {Sparse Task Vector Mixup with Hypernetworks for Efficient Knowledge Transfer in Whole-Slide Image Prognosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35238-35247} }
TokenTrace: Multi-Concept Attribution through Watermarked Token Recovery: Li Zhang,

Shruti Agarwal,

John Collomosse,

Pengtao Xie,

Vishal Asnani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Li and Agarwal, Shruti and Collomosse, John and Xie, Pengtao and Asnani, Vishal}, title = {TokenTrace: Multi-Concept Attribution through Watermarked Token Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42942-42952} }
Twin-T & TwintVQA: A Reliable Structure-Detail Separating VLM and a Comprehensive Benchmark for Chart and Table Tasks: Jiahua Bao,

Siyao Cheng,

Jiaxing Du,

Qingtao Xia,

Changjiang He,

Zeming Lang,

Jie Liu; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Jiahua and Cheng, Siyao and Du, Jiaxing and Xia, Qingtao and He, Changjiang and Lang, Zeming and Liu, Jie}, title = {Twin-T \& TwintVQA: A Reliable Structure-Detail Separating VLM and a Comprehensive Benchmark for Chart and Table Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4850-4859} }
HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation: Chengjie Fan,

Cong Pan,

Zijian Liu,

Ningzhong Liu,

Jie Qin; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Chengjie and Pan, Cong and Liu, Zijian and Liu, Ningzhong and Qin, Jie}, title = {HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10976-10985} }
NanoSD: Edge Efficient Foundation Model for Real Time Image Restoration: Subhajit Sanyal,

Srinivas Soumitri Miriyala,

Akshay Janardan Bankar,

Manjunath Arveti,

Sowmya Vajrala,

Shreyas Pandith,

Sravanth Kodavanti,

Abhishek Ameta,

Harshit Harshit,

Amit Satish Unde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sanyal_2026_CVPR, author = {Sanyal, Subhajit and Miriyala, Srinivas Soumitri and Bankar, Akshay Janardan and Arveti, Manjunath and Vajrala, Sowmya and Pandith, Shreyas and Kodavanti, Sravanth and Ameta, Abhishek and Harshit, Harshit and Unde, Amit Satish}, title = {NanoSD: Edge Efficient Foundation Model for Real Time Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8449-8459} }
MAD: Modality-Adaptive Decoding for Mitigating Cross-Modal Hallucinations in Multimodal Large Language Models: Sangyun Chung,

Se Yeon Kim,

Youngchae Chee,

Yong Man Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2026_CVPR, author = {Chung, Sangyun and Kim, Se Yeon and Chee, Youngchae and Ro, Yong Man}, title = {MAD: Modality-Adaptive Decoding for Mitigating Cross-Modal Hallucinations in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40175-40185} }
MMR-AD: A Large-Scale Multimodal Dataset for Benchmarking General Anomaly Detection with Multimodal Large Language Models: Xincheng Yao,

Zefeng Qian,

Chao Shi,

Jiayang Song,

Chongyang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Xincheng and Qian, Zefeng and Shi, Chao and Song, Jiayang and Zhang, Chongyang}, title = {MMR-AD: A Large-Scale Multimodal Dataset for Benchmarking General Anomaly Detection with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43072-43082} }
Solving Minimal Problems Without Matrix Inversion Using FFT-Based Interpolation: Haidong Wu,

Snehal Bhayani,

Janne Heikkila; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haidong and Bhayani, Snehal and Heikkila, Janne}, title = {Solving Minimal Problems Without Matrix Inversion Using FFT-Based Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19771-19780} }
Teacher-Guided Routing for Sparse Vision Mixture-of-Experts: Masahiro Kada,

Ryota Yoshihashi,

Satoshi Ikehata,

Rei Kawakami,

Ikuro Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kada_2026_CVPR, author = {Kada, Masahiro and Yoshihashi, Ryota and Ikehata, Satoshi and Kawakami, Rei and Sato, Ikuro}, title = {Teacher-Guided Routing for Sparse Vision Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6198-6208} }
DeltaQuant: 4-bit Video Diffusion Models with Spatiotemporal Delta Smoothing: Xingyang Li,

Samuel Tesfai,

Zhekai Zhang,

Haocheng Xi,

Shuo Yang,

Lvmin Zhang,

Yufei Sun,

Kelly Peng,

Maneesh Agrawala,

Ion Stoica,

Kurt Keutzer,

Jun-Yan Zhu,

Song Han,

Yujun Lin,

Muyang Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xingyang and Tesfai, Samuel and Zhang, Zhekai and Xi, Haocheng and Yang, Shuo and Zhang, Lvmin and Sun, Yufei and Peng, Kelly and Agrawala, Maneesh and Stoica, Ion and Keutzer, Kurt and Zhu, Jun-Yan and Han, Song and Lin, Yujun and Li, Muyang}, title = {DeltaQuant: 4-bit Video Diffusion Models with Spatiotemporal Delta Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43578-43588} }
AstraNav-Memory: Contexts Compression for Long Memory: Junjun Hu,

Xinda Xue,

Botao Ren,

Minghua Luo,

Jintao Chen,

Haochen Bai,

Liangliang You,

Mu Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Junjun and Xue, Xinda and Ren, Botao and Luo, Minghua and Chen, Jintao and Bai, Haochen and You, Liangliang and Xu, Mu}, title = {AstraNav-Memory: Contexts Compression for Long Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8097-8109} }
Improving Motion in Image-to-Video Models via Adaptive Low-Pass Guidance: June Suk Choi,

Kyungmin Lee,

Sihyun Yu,

Yisol Choi,

Jinwoo Shin,

Kimin Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, June Suk and Lee, Kyungmin and Yu, Sihyun and Choi, Yisol and Shin, Jinwoo and Lee, Kimin}, title = {Improving Motion in Image-to-Video Models via Adaptive Low-Pass Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40364-40375} }
Parameter-Efficient Semantic Augmentation for Enhancing Open-Vocabulary Object Detection: Weihao Cao,

Runqi Wang,

Xiaoyue Duan,

Jinchao Zhang,

Ang Yang,

Liping Jing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Weihao and Wang, Runqi and Duan, Xiaoyue and Zhang, Jinchao and Yang, Ang and Jing, Liping}, title = {Parameter-Efficient Semantic Augmentation for Enhancing Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20367-20376} }
Globally Optimal Pose from Orthographic Silhouettes: Agniva Sengupta,

Dilara Kus,

Jianning Li,

Stefan Zachow; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sengupta_2026_CVPR, author = {Sengupta, Agniva and Kus, Dilara and Li, Jianning and Zachow, Stefan}, title = {Globally Optimal Pose from Orthographic Silhouettes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11029-11038} }
Otil: Accelerating Diffusion Model Inference via Communication-Efficient Multi-GPU Parallelism: Xin Li,

Shujun Tian,

Tao Lu,

Han Bao,

Zonghui Wang,

Wenzhi Chen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xin and Tian, Shujun and Lu, Tao and Bao, Han and Wang, Zonghui and Chen, Wenzhi}, title = {Otil: Accelerating Diffusion Model Inference via Communication-Efficient Multi-GPU Parallelism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38487-38497} }
Computation and Communication Efficient Federated Unlearning via On-server Gradient Conflict Mitigation and Expression: Minh-Duong Nguyen,

Senura Wanasekara,

Le-Tuan Nguyen,

Quoc-Viet Pham,

Ken-Tye Yong,

Nguyen H. Tran,

Dung D. Le; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Minh-Duong and Wanasekara, Senura and Nguyen, Le-Tuan and Pham, Quoc-Viet and Yong, Ken-Tye and Tran, Nguyen H. and Le, Dung D.}, title = {Computation and Communication Efficient Federated Unlearning via On-server Gradient Conflict Mitigation and Expression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3347-3357} }
RegionFuse: Region-Adaptive Pixel Distribution Learning for Infrared and Visible Image Fusion: Jianghan Xia,

Hong Song,

Jinfu Li,

Yucong Lin,

Shihan Ma,

Jingfan Fan,

Danni Ai,

Tianyu Fu,

Deqiang Xiao,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Jianghan and Song, Hong and Li, Jinfu and Lin, Yucong and Ma, Shihan and Fan, Jingfan and Ai, Danni and Fu, Tianyu and Xiao, Deqiang and Yang, Jian}, title = {RegionFuse: Region-Adaptive Pixel Distribution Learning for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19539-19548} }
AnomalyVFM -- Transforming Vision Foundation Models into Zero-Shot Anomaly Detectors: Matic Fučka,

Vitjan Zavrtanik,

Danijel Skočaj; [pdf] [supp]
[bibtex]
@InProceedings{Fucka_2026_CVPR, author = {Fu\v{c}ka, Matic and Zavrtanik, Vitjan and Sko\v{c}aj, Danijel}, title = {AnomalyVFM -- Transforming Vision Foundation Models into Zero-Shot Anomaly Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35555-35566} }
VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation: Junwen Tan,

Jinglin Liang,

Hongyuan Chen,

Shuangping Huang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Junwen and Liang, Jinglin and Chen, Hongyuan and Huang, Shuangping}, title = {VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37918-37928} }
TreeTeaming: Autonomous Red-Teaming of Vision-Language Models via Hierarchical Strategy Exploration: Chunxiao Li,

Lijun Li,

Jing Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunxiao and Li, Lijun and Shao, Jing}, title = {TreeTeaming: Autonomous Red-Teaming of Vision-Language Models via Hierarchical Strategy Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37684-37693} }
Task-Driven Implicit Representations for Automated Design of LiDAR Systems: Nikhil Behari,

Aaron Young,

Tzofi Klinghoffer,

Akshat Dave,

Ramesh Raskar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behari_2026_CVPR, author = {Behari, Nikhil and Young, Aaron and Klinghoffer, Tzofi and Dave, Akshat and Raskar, Ramesh}, title = {Task-Driven Implicit Representations for Automated Design of LiDAR Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24248-24257} }
Mind the Gap: Transferring Labels to Align Object Detection Datasets: Mikhail Kennerley,

Angelica I. Aviles-Rivero,

Carola-Bibiane Schönlieb,

Robby T. Tan; [pdf] [supp]
[bibtex]
@InProceedings{Kennerley_2026_CVPR, author = {Kennerley, Mikhail and Aviles-Rivero, Angelica I. and Sch\"onlieb, Carola-Bibiane and Tan, Robby T.}, title = {Mind the Gap: Transferring Labels to Align Object Detection Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4353-4362} }
Decoupled Generative Modeling for Human-Object Interaction Synthesis: Hwanhee Jung,

Seunggwan Lee,

Jeongyoon Yoon,

SeungHyeon Kim,

Giljoo Nam,

Qixing Huang,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Hwanhee and Lee, Seunggwan and Yoon, Jeongyoon and Kim, SeungHyeon and Nam, Giljoo and Huang, Qixing and Kim, Sangpil}, title = {Decoupled Generative Modeling for Human-Object Interaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2253-2263} }
Learning Latent Concepts for Detecting Out-of-Distribution Objects: Ting Peng,

Junhao Dong,

Yew-Soon Ong; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Ting and Dong, Junhao and Ong, Yew-Soon}, title = {Learning Latent Concepts for Detecting Out-of-Distribution Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28723-28733} }
Proxy3D: Efficient 3D Representations for Vision-Language Models via Semantic Clustering and Alignment: Jerry Jiang,

Haowen Sun,

Denis Gudovskiy,

Yohei Nakata,

Tomoyuki Okuno,

Kurt Keutzer,

Wenzhao Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jerry and Sun, Haowen and Gudovskiy, Denis and Nakata, Yohei and Okuno, Tomoyuki and Keutzer, Kurt and Zheng, Wenzhao}, title = {Proxy3D: Efficient 3D Representations for Vision-Language Models via Semantic Clustering and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23816-23825} }
HiCoGen: Hierarchical Compositional Text-to-Image Generation in Diffusion Models via Reinforcement Learning: Hongji Yang,

Yucheng Zhou,

Wencheng Han,

Runzhou Tao,

Zhongying Qiu,

Jianfei Yang,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Hongji and Zhou, Yucheng and Han, Wencheng and Tao, Runzhou and Qiu, Zhongying and Yang, Jianfei and Shen, Jianbing}, title = {HiCoGen: Hierarchical Compositional Text-to-Image Generation in Diffusion Models via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36793-36802} }
SpatialReward: Verifiable Spatial Reward Modeling for Fine-Grained Spatial Consistency in Text-to-Image Generation: Sashuai Zhou,

Qiang Zhou,

Junpeng Ma,

Yue Cao,

Ruofan Hu,

Ziang Zhang,

Xiaoda Yang,

Zhibin Wang,

Jun Song,

Cheng Yu,

Bo Zheng,

Zhou Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Sashuai and Zhou, Qiang and Ma, Junpeng and Cao, Yue and Hu, Ruofan and Zhang, Ziang and Yang, Xiaoda and Wang, Zhibin and Song, Jun and Yu, Cheng and Zheng, Bo and Zhao, Zhou}, title = {SpatialReward: Verifiable Spatial Reward Modeling for Fine-Grained Spatial Consistency in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {647-658} }
Feed-Forward One-Shot Animatable Textured Mesh Avatar Reconstruction: Yisheng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yisheng}, title = {Feed-Forward One-Shot Animatable Textured Mesh Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4145-4156} }
QUANTIPHY: A Quantitative Benchmark Evaluating Physical Reasoning Abilities of Vision-Language Models: Li Puyin,

Tiange Xiang,

Ella Mao,

Shirley Wei,

Xinye Chen,

Adnan Masood,

Li Fei-Fei,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Puyin_2026_CVPR, author = {Puyin, Li and Xiang, Tiange and Mao, Ella and Wei, Shirley and Chen, Xinye and Masood, Adnan and Fei-Fei, Li and Adeli, Ehsan}, title = {QUANTIPHY: A Quantitative Benchmark Evaluating Physical Reasoning Abilities of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33174-33184} }
From None to All: Self-Supervised 3D Reconstruction via Novel View Synthesis: Ranran Huang,

Weixun Luo,

Ye Mao,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Ranran and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {From None to All: Self-Supervised 3D Reconstruction via Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37358-37369} }
Tokenization Allows Multimodal Large Language Models to Understand, Generate and Edit Architectural Floor Plans: Sizhong Qin,

Ramon Elias Weber,

Xinzheng Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Sizhong and Weber, Ramon Elias and Lu, Xinzheng}, title = {Tokenization Allows Multimodal Large Language Models to Understand, Generate and Edit Architectural Floor Plans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10430-10440} }
RetimeGS: Continuous-Time Reconstruction of 4D Gaussian Splatting: Xuezhen Wang,

Li Ma,

Yulin Shen,

Zeyu Wang,

Pedro V. Sander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuezhen and Ma, Li and Shen, Yulin and Wang, Zeyu and Sander, Pedro V.}, title = {RetimeGS: Continuous-Time Reconstruction of 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7340-7350} }
MV3DIS: Multi-View Mask Matching via 3D Guides for Zero-Shot 3D Instance Segmentation: Yibo Zhao,

Yigong Zhang,

Jin Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yibo and Zhang, Yigong and Xie, Jin}, title = {MV3DIS: Multi-View Mask Matching via 3D Guides for Zero-Shot 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17916-17926} }
InvAD: Inversion-based Reconstruction-Free Anomaly Detection with Diffusion Models: Shunsuke Sakai,

Xiangteng He,

Chunzhi Gu,

Leonid Sigal,

Tatsuhito Hasegawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sakai_2026_CVPR, author = {Sakai, Shunsuke and He, Xiangteng and Gu, Chunzhi and Sigal, Leonid and Hasegawa, Tatsuhito}, title = {InvAD: Inversion-based Reconstruction-Free Anomaly Detection with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21389-21398} }
PHASE-Net: Physics-Grounded Harmonic Attention System for Efficient Remote Photoplethysmography Measurement: Bo Zhao,

Dan Guo,

Junzhe Cao,

Yong Xu,

Bochao Zou,

Tao Tan,

Yue Sun,

Zitong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Bo and Guo, Dan and Cao, Junzhe and Xu, Yong and Zou, Bochao and Tan, Tao and Sun, Yue and Yu, Zitong}, title = {PHASE-Net: Physics-Grounded Harmonic Attention System for Efficient Remote Photoplethysmography Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21198-21207} }
Trainable Log-linear Sparse Attention for Efficient Diffusion Transformers: Yifan Zhou,

Zeqi Xiao,

Tianyi Wei,

Shuai Yang,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yifan and Xiao, Zeqi and Wei, Tianyi and Yang, Shuai and Pan, Xingang}, title = {Trainable Log-linear Sparse Attention for Efficient Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9424-9433} }
PLACID: Identity-Preserving Multi-Object Compositing via Video Diffusion with Synthetic Trajectories: Gemma Canet Tarrés,

Manel Baradad,

Francesc Moreno-Noguer,

Yumeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Tarres_2026_CVPR, author = {Tarr\'es, Gemma Canet and Baradad, Manel and Moreno-Noguer, Francesc and Li, Yumeng}, title = {PLACID: Identity-Preserving Multi-Object Compositing via Video Diffusion with Synthetic Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38060-38070} }
OmniZip: Audio-Guided Dynamic Token Compression for Fast Omnimodal Large Language Models: Keda Tao,

Kele Shao,

Bohan Yu,

Weiqiang Wang,

Jian Liu,

Huan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Keda and Shao, Kele and Yu, Bohan and Wang, Weiqiang and Liu, Jian and Wang, Huan}, title = {OmniZip: Audio-Guided Dynamic Token Compression for Fast Omnimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17682-17692} }
Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views: Zhangquan Chen,

Manyuan Zhang,

Xinlei Yu,

Xufang Luo,

Mingze Sun,

Zihao Pan,

Xiang An,

Yan Feng,

Peng Pei,

Xunliang Cai,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhangquan and Zhang, Manyuan and Yu, Xinlei and Luo, Xufang and Sun, Mingze and Pan, Zihao and An, Xiang and Feng, Yan and Pei, Peng and Cai, Xunliang and Huang, Ruqi}, title = {Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2613-2624} }
Neural Field-Based 3D Surface Reconstruction of Microstructures from Multi-Detector Signals in Scanning Electron Microscopy: Shuo Chen,

Yijin Li,

Xi Zheng,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shuo and Li, Yijin and Zheng, Xi and Zhang, Guofeng}, title = {Neural Field-Based 3D Surface Reconstruction of Microstructures from Multi-Detector Signals in Scanning Electron Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7266-7277} }
TerraScope: Pixel-Grounded Visual Reasoning for Earth Observation: Yan Shu,

Bin Ren,

Zhitong Xiong,

Xiao Xiang Zhu,

Begüm Demir,

Nicu Sebe,

Paolo Rota; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shu_2026_CVPR, author = {Shu, Yan and Ren, Bin and Xiong, Zhitong and Zhu, Xiao Xiang and Demir, Beg\"um and Sebe, Nicu and Rota, Paolo}, title = {TerraScope: Pixel-Grounded Visual Reasoning for Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16712-16722} }
RAVEN: Erasing Invisible Watermarks via Novel View Synthesis: Fahad Shamshad,

Nils Lukas,

Karthik Nandakumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shamshad_2026_CVPR, author = {Shamshad, Fahad and Lukas, Nils and Nandakumar, Karthik}, title = {RAVEN: Erasing Invisible Watermarks via Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {82-91} }
FedMPT: Federated Multi-Label Prompt Tuning of Vision-Language Models: Xucong Wang,

Pengkun Wang,

Zhe Zhao,

Liheng Yu,

Shuang Wang,

Yang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xucong and Wang, Pengkun and Zhao, Zhe and Yu, Liheng and Wang, Shuang and Wang, Yang}, title = {FedMPT: Federated Multi-Label Prompt Tuning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17226-17236} }
Prune Wisely, Reconstruct Sharply: Compact 3D Gaussian Splatting via Adaptive Pruning and Difference-of-Gaussian Primitives: Haoran Wang,

Guoxi Huang,

Fan Zhang,

David Bull,

Nantheera Anantrasirichai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Haoran and Huang, Guoxi and Zhang, Fan and Bull, David and Anantrasirichai, Nantheera}, title = {Prune Wisely, Reconstruct Sharply: Compact 3D Gaussian Splatting via Adaptive Pruning and Difference-of-Gaussian Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11716-11725} }
Deconstructing the Failure of Ideal Noise Correction: A Three-Pillar Diagnosis: Chen Feng,

Zhuo Zhi,

Zhao Huang,

Jiawei Ge,

Ling Xiao,

Nicu Sebe,

Georgios Tzimiropoulos,

Ioannis Patras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Chen and Zhi, Zhuo and Huang, Zhao and Ge, Jiawei and Xiao, Ling and Sebe, Nicu and Tzimiropoulos, Georgios and Patras, Ioannis}, title = {Deconstructing the Failure of Ideal Noise Correction: A Three-Pillar Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34512-34523} }
Temporal Interaction in Spiking Transformers with Multi-Delay Mixer: Kexin Shi,

Hanwen Liu,

Zeyang Song,

Yang Liu,

Jieyuan Zhang,

Shuai Wang,

Jibin Wu,

Malu Zhang,

Yang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Kexin and Liu, Hanwen and Song, Zeyang and Liu, Yang and Zhang, Jieyuan and Wang, Shuai and Wu, Jibin and Zhang, Malu and Yang, Yang}, title = {Temporal Interaction in Spiking Transformers with Multi-Delay Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34555-34565} }
Fine-Grained GRPO for Precise Preference Alignment in Flow Models: Yujie Zhou,

Pengyang Ling,

Jiazi Bu,

Yibin Wang,

Yuhang Zang,

Jiaqi Wang,

Li Niu,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yujie and Ling, Pengyang and Bu, Jiazi and Wang, Yibin and Zang, Yuhang and Wang, Jiaqi and Niu, Li and Zhai, Guangtao}, title = {Fine-Grained GRPO for Precise Preference Alignment in Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20045-20054} }
Paparazzo: Active Mapping of Moving 3D Objects: Davide Allegro,

Shiyao Li,

Stefano Ghidoni,

Vincent Lepetit; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Allegro_2026_CVPR, author = {Allegro, Davide and Li, Shiyao and Ghidoni, Stefano and Lepetit, Vincent}, title = {Paparazzo: Active Mapping of Moving 3D Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12586-12594} }
Attribute-Preserving Pseudo-Labeling for Diffusion-Based Face Swapping: Jiwon Kang,

Yeji Choi,

JoungBin Lee,

Wooseok Jang,

Jinhyeok Choi,

Taekeun Kang,

Yongjae Park,

Myungin Kim,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Jiwon and Choi, Yeji and Lee, JoungBin and Jang, Wooseok and Choi, Jinhyeok and Kang, Taekeun and Park, Yongjae and Kim, Myungin and Kim, Seungryong}, title = {Attribute-Preserving Pseudo-Labeling for Diffusion-Based Face Swapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18651-18661} }
Sketch2Colab: Sketch-Conditioned Multi-Human Animation via Controllable Flow Distillation: Divyanshu Daiya,

Aniket Bera; [pdf] [arXiv]
[bibtex]
@InProceedings{Daiya_2026_CVPR, author = {Daiya, Divyanshu and Bera, Aniket}, title = {Sketch2Colab: Sketch-Conditioned Multi-Human Animation via Controllable Flow Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30675-30685} }
Efficient Equivariant Transformer for Self-Driving Agent Modeling: Scott Xu,

Dian Chen,

Kelvin Wong,

Chris Zhang,

Kion Fallah,

Raquel Urtasun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Scott and Chen, Dian and Wong, Kelvin and Zhang, Chris and Fallah, Kion and Urtasun, Raquel}, title = {Efficient Equivariant Transformer for Self-Driving Agent Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32164-32172} }
Temporal Representation Enhancement (TRE): Learning to Forget Dominant Patterns for Enhanced Temporal Spiking Features: Wei Liu,

Li Yang,

Yufei Wang,

Han Xiao,

Boyu Cai,

Weiming Hu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Wei and Yang, Li and Wang, Yufei and Xiao, Han and Cai, Boyu and Hu, Weiming}, title = {Temporal Representation Enhancement (TRE): Learning to Forget Dominant Patterns for Enhanced Temporal Spiking Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34480-34490} }
Dynamic Token Reweighting for Robust Vision-Language Models: Tanqiu Jiang,

Jiacheng Liang,

Rongyi Zhu,

Jiawei Zhou,

Fenglong Ma,

Ting Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Tanqiu and Liang, Jiacheng and Zhu, Rongyi and Zhou, Jiawei and Ma, Fenglong and Wang, Ting}, title = {Dynamic Token Reweighting for Robust Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24481-24491} }
Paper2Figure: A Multi-Agent Collaborative System for Figure Generation Towards Academic Research Paper: Siwei Han,

Haonian Ji,

Siyang Xin,

Juanquan Shi,

Shi Qiu,

Xinyu Ye,

Peng Xia,

Jiaqi Liu,

Zhaorun Chen,

Yiyang Zhou,

Linjie Li,

Lijuan Wang,

Huaxiu Yao; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Siwei and Ji, Haonian and Xin, Siyang and Shi, Juanquan and Qiu, Shi and Ye, Xinyu and Xia, Peng and Liu, Jiaqi and Chen, Zhaorun and Zhou, Yiyang and Li, Linjie and Wang, Lijuan and Yao, Huaxiu}, title = {Paper2Figure: A Multi-Agent Collaborative System for Figure Generation Towards Academic Research Paper}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29157-29166} }
OctoT2I: A Self-Evolving Agentic Text-to-Image Router: Xu Jiang,

Bin Chen,

Gehui Li,

Yule Duan,

Ronggang Wang,

Jian Zhang; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xu and Chen, Bin and Li, Gehui and Duan, Yule and Wang, Ronggang and Zhang, Jian}, title = {OctoT2I: A Self-Evolving Agentic Text-to-Image Router}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31628-31638} }
Chorus: Multi-Teacher Pretraining for Holistic 3D Gaussian Scene Encoding: Yue Li,

Qi Ma,

Runyi Yang,

Mengjiao Ma,

Bin Ren,

Nikola Popovic,

Nicu Sebe,

Theo Gevers,

Luc Van Gool,

Danda Pani Paudel,

Martin R. Oswald; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yue and Ma, Qi and Yang, Runyi and Ma, Mengjiao and Ren, Bin and Popovic, Nikola and Sebe, Nicu and Gevers, Theo and Van Gool, Luc and Paudel, Danda Pani and Oswald, Martin R.}, title = {Chorus: Multi-Teacher Pretraining for Holistic 3D Gaussian Scene Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21431-21442} }
FlowFM: Advancing Dark Optical Flow Estimation with Flow Matching: Fengyuan Zuo,

Haiyan Jin,

Yuanlin Zhang,

Zhaolin Xiao,

Bin Wang,

Yuerong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Fengyuan and Jin, Haiyan and Zhang, Yuanlin and Xiao, Zhaolin and Wang, Bin and Mu, Yuerong}, title = {FlowFM: Advancing Dark Optical Flow Estimation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6837-6846} }
Residual Diffusion Bridge Model for Image Restoration: Hebaixu Wang,

Jing Zhang,

Haoyang Chen,

Haonan Guo,

Di Wang,

Jiayi Ma,

Bo Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hebaixu and Zhang, Jing and Chen, Haoyang and Guo, Haonan and Wang, Di and Ma, Jiayi and Du, Bo}, title = {Residual Diffusion Bridge Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8375-8386} }
Rewis3d: Reconstruction Improves Weakly-Supervised Semantic Segmentation: Jonas Ernst,

Wolfgang Boettcher,

Lukas Hoyer,

Jan Eric Lenssen,

Bernt Schiele; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ernst_2026_CVPR, author = {Ernst, Jonas and Boettcher, Wolfgang and Hoyer, Lukas and Lenssen, Jan Eric and Schiele, Bernt}, title = {Rewis3d: Reconstruction Improves Weakly-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13091-13101} }
Keep it SymPL: Symbolic Projective Layout for Allocentric Spatial Reasoning in Vision-Language Models: Jaeyun Jang,

Seunghui Shin,

Taeho Park,

Hyoseok Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Jaeyun and Shin, Seunghui and Park, Taeho and Hwang, Hyoseok}, title = {Keep it SymPL: Symbolic Projective Layout for Allocentric Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9604-9614} }
QueryMe: Query-Driven Open-Vocabulary 3D Object Affordances Grounding from Multimodal Evidence: Weiyu Zhao,

Ru Li,

Jiaqi Liu,

Sizhe Zhao,

Qinglin Liu,

Shengping Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Weiyu and Li, Ru and Liu, Jiaqi and Zhao, Sizhe and Liu, Qinglin and Zhang, Shengping}, title = {QueryMe: Query-Driven Open-Vocabulary 3D Object Affordances Grounding from Multimodal Evidence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2603-2612} }
Portable Active Learning for Object Detection: Rashi Sharma,

Justin Timothy C. Bersamin,

Karthikk Subramanian; [pdf] [arXiv]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Rashi and Bersamin, Justin Timothy C. and Subramanian, Karthikk}, title = {Portable Active Learning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25514-25523} }
Dynamic Label Noise Suppression with Optimal Teacher Pool for Facial Expression Recognition: Yuzhuang Yang,

Xiaolin Tian,

Qigong Sun; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuzhuang and Tian, Xiaolin and Sun, Qigong}, title = {Dynamic Label Noise Suppression with Optimal Teacher Pool for Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32389-32398} }
LASAR: Towards Spatio-temporal Reasoning with Latent Cognitive Map: Jinzhou Tang,

Sidi Liu,

Waikit Xiu,

Weixing Chen,

Keze Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jinzhou and Liu, Sidi and Xiu, Waikit and Chen, Weixing and Wang, Keze}, title = {LASAR: Towards Spatio-temporal Reasoning with Latent Cognitive Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23880-23890} }
DREAM: Document Recognition with Explicit Adaptive Memory: Tianqi Zhao,

Di Wu,

Liangrui Peng,

Yifan Huang,

Kemeng Zhao,

Shuo Li,

Zhiyu Li,

Yizhu Wang,

Borui Jiang,

Yuyang Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Tianqi and Wu, Di and Peng, Liangrui and Huang, Yifan and Zhao, Kemeng and Li, Shuo and Li, Zhiyu and Wang, Yizhu and Jiang, Borui and Li, Yuyang}, title = {DREAM: Document Recognition with Explicit Adaptive Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2715-2724} }
YOSE: You Only Select Essential Tokens for Efficient DiT-based Video Object Removal: Chenyang Wu,

Lina Lei,

Fan Li,

Chunle Guo,

Dehong Kong,

Xinran Qin,

Zhixin Wang,

Mingming Cheng,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyang and Lei, Lina and Li, Fan and Guo, Chunle and Kong, Dehong and Qin, Xinran and Wang, Zhixin and Cheng, Mingming and Li, Chongyi}, title = {YOSE: You Only Select Essential Tokens for Efficient DiT-based Video Object Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32926-32935} }
RHINO: Reconstructing Human Interactions with Novel Objects from Monocular Videos: Lixin Xue,

Chengwei Zheng,

Georgios Paschalidis,

Chen Guo,

Manuel Kaufmann,

Juan Zarate,

Dimitrios Tzionas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Lixin and Zheng, Chengwei and Paschalidis, Georgios and Guo, Chen and Kaufmann, Manuel and Zarate, Juan and Tzionas, Dimitrios}, title = {RHINO: Reconstructing Human Interactions with Novel Objects from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13835-13845} }
Geoint-R1: Formalizing Multimodal Geometric Reasoning with Dynamic Auxiliary Constructions: Jingxuan Wei,

Caijun Jia,

Qi Chen,

Honghao He,

Linzhuang Sun,

Conghui He,

Lijun Wu,

Bihui Yu,

Cheng Tan; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jingxuan and Jia, Caijun and Chen, Qi and He, Honghao and Sun, Linzhuang and He, Conghui and Wu, Lijun and Yu, Bihui and Tan, Cheng}, title = {Geoint-R1: Formalizing Multimodal Geometric Reasoning with Dynamic Auxiliary Constructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2547-2556} }
Geometric Neural Distance Fields for Learning Human Motion Priors: Zhengdi Yu,

Simone Foti,

Linguang Zhang,

g921@gmail.com Meta Reality Labs,

Amy Zhao,

Cem Keskin,

Stefanos Zafeiriou,

Tolga Birdal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Zhengdi and Foti, Simone and Zhang, Linguang and Labs, g921@gmail.com Meta Reality and Zhao, Amy and Keskin, Cem and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Geometric Neural Distance Fields for Learning Human Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2232-2242} }
FARMER: Flow AutoRegressive Transformer over Pixels: Guangting Zheng,

Qinyu Zhao,

Tao Yang,

Fei Xiao,

Zhijie Lin,

Jie Wu,

Jiajun Deng,

Yanyong Zhang,

Rui Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Guangting and Zhao, Qinyu and Yang, Tao and Xiao, Fei and Lin, Zhijie and Wu, Jie and Deng, Jiajun and Zhang, Yanyong and Zhu, Rui}, title = {FARMER: Flow AutoRegressive Transformer over Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25730-25741} }
IBISAgent: Reinforcing Pixel-Level Visual Reasoning in MLLMs for Universal Biomedical Object Referring and Segmentation: Yankai Jiang,

Qiaoru Li,

Binlu Xu,

Haoran Sun,

Chao Ding,

Junting Dong,

Yuxiang Cai,

Xuhong Zhang,

Jianwei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yankai and Li, Qiaoru and Xu, Binlu and Sun, Haoran and Ding, Chao and Dong, Junting and Cai, Yuxiang and Zhang, Xuhong and Yin, Jianwei}, title = {IBISAgent: Reinforcing Pixel-Level Visual Reasoning in MLLMs for Universal Biomedical Object Referring and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20996-21005} }
CCCaption: Dual-Reward Reinforcement Learning for Complete and Correct Image Captioning: Zhijiang Tang,

Linhua Wang,

Jiaxin Qi,

Weihao Jiang,

Peng Hou,

Anxiang Zeng,

Jianqiang Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zhijiang and Wang, Linhua and Qi, Jiaxin and Jiang, Weihao and Hou, Peng and Zeng, Anxiang and Huang, Jianqiang}, title = {CCCaption: Dual-Reward Reinforcement Learning for Complete and Correct Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22153-22163} }
Virtual Nodes Guided Dynamic Graph Neural Network for Brain Tumor Segmentation with Missing Modalities: Sha Tao,

Jiao Pan,

Yu Guo,

Chao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Sha and Pan, Jiao and Guo, Yu and Yao, Chao}, title = {Virtual Nodes Guided Dynamic Graph Neural Network for Brain Tumor Segmentation with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37528-37537} }
LinVideo: A Post-Training Framework towards O(n) Attention in Efficient Video Generation: Yushi Huang,

Xingtong Ge,

Ruihao Gong,

Chengtao Lv,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yushi and Ge, Xingtong and Gong, Ruihao and Lv, Chengtao and Zhang, Jun}, title = {LinVideo: A Post-Training Framework towards O(n) Attention in Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23398-23408} }
Tell2Adapt: A Unified Framework for Source Free Unsupervised Domain Adaptation via Vision Foundation Model: Yulong Shi,

Shijie Li,

Ziyi Li,

Lin Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yulong and Li, Shijie and Li, Ziyi and Qi, Lin}, title = {Tell2Adapt: A Unified Framework for Source Free Unsupervised Domain Adaptation via Vision Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6941-6950} }
Probing and Bridging Geometry-Interaction Cues for Affordance Reasoning in Vision Foundation Models: Qing Zhang,

Xuesong Li,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qing and Li, Xuesong and Zhang, Jing}, title = {Probing and Bridging Geometry-Interaction Cues for Affordance Reasoning in Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2526-2536} }
Unifying Perception and Action: A Hybrid-Modality Pipeline with Implicit Visual Chain-of-Thought for Robotic Action Generation: Xiangkai Ma,

Lekai Xing,

Han Zhang,

Wenzhong Li,

Sanglu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Xiangkai and Xing, Lekai and Zhang, Han and Li, Wenzhong and Lu, Sanglu}, title = {Unifying Perception and Action: A Hybrid-Modality Pipeline with Implicit Visual Chain-of-Thought for Robotic Action Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22380-22390} }
The Devil Is in Gradient Entanglement: Energy-Aware Gradient Coordinator for Robust Generalized Category Discovery: Haiyang Zheng,

Nan Pu,

Yaqi Cai,

Teng Long,

Wenjing Li,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haiyang and Pu, Nan and Cai, Yaqi and Long, Teng and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {The Devil Is in Gradient Entanglement: Energy-Aware Gradient Coordinator for Robust Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3563-3573} }
MatAnyone 2: Scaling Video Matting via a Learned Quality Evaluator: Peiqing Yang,

Shangchen Zhou,

Kai Hao,

Qingyi Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Peiqing and Zhou, Shangchen and Hao, Kai and Tao, Qingyi}, title = {MatAnyone 2: Scaling Video Matting via a Learned Quality Evaluator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37476-37485} }
IF-Bench: Benchmarking and Enhancing MLLMs for Infrared Images with Generative Visual Prompting: Tao Zhang,

Yuyang Hong,

Yang Xia,

Kun Ding,

Zeyu Zhang,

Ying Wang,

Shiming Xiang,

Chunhong Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tao and Hong, Yuyang and Xia, Yang and Ding, Kun and Zhang, Zeyu and Wang, Ying and Xiang, Shiming and Pan, Chunhong}, title = {IF-Bench: Benchmarking and Enhancing MLLMs for Infrared Images with Generative Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8205-8215} }
DENALI: A Dataset Enabling Non-Line-of-Sight Spatial Reasoning with Low-Cost LiDARs: Nikhil Behari,

Diego Rivero,

Luke Apostolides,

Suman Ghosh,

Paul Pu Liang,

Ramesh Raskar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behari_2026_CVPR, author = {Behari, Nikhil and Rivero, Diego and Apostolides, Luke and Ghosh, Suman and Liang, Paul Pu and Raskar, Ramesh}, title = {DENALI: A Dataset Enabling Non-Line-of-Sight Spatial Reasoning with Low-Cost LiDARs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3046-3055} }
VOLD: Reasoning Transfer from LLMs to Vision-Language Models via On-Policy Distillation: Walid Bousselham,

Hilde Kuehne,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bousselham_2026_CVPR, author = {Bousselham, Walid and Kuehne, Hilde and Schmid, Cordelia}, title = {VOLD: Reasoning Transfer from LLMs to Vision-Language Models via On-Policy Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26209-26218} }
FoV-Net: Rotation-Invariant CAD B-rep Learning via Field-of-View Ray Casting: Matteo Ballegeer,

Dries F. Benoit; [pdf] [arXiv]
[bibtex]
@InProceedings{Ballegeer_2026_CVPR, author = {Ballegeer, Matteo and Benoit, Dries F.}, title = {FoV-Net: Rotation-Invariant CAD B-rep Learning via Field-of-View Ray Casting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3024-3034} }
Harmonic Canvas: Inversion-Free Editing for Visually-Guided Music Style Transfer: Yue Lei,

Siqi Yang,

Ting Zhong,

Fan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Yue and Yang, Siqi and Zhong, Ting and Zhou, Fan}, title = {Harmonic Canvas: Inversion-Free Editing for Visually-Guided Music Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29727-29737} }
VideoCoF: Unified Video Editing with Temporal Reasoner: Xiangpeng Yang,

Ji Xie,

Yiyuan Yang,

Yue Ma,

Yan Huang,

Min Xu,

Qiang Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiangpeng and Xie, Ji and Yang, Yiyuan and Ma, Yue and Huang, Yan and Xu, Min and Wu, Qiang}, title = {VideoCoF: Unified Video Editing with Temporal Reasoner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37940-37949} }
RE-VLM: Event-Augmented Vision-Language Model for Scene Understanding: Hanqing Liu,

Mingjie Liu,

Luoping Cui,

Endian Lin,

Donghong Jiang,

Chuang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hanqing and Liu, Mingjie and Cui, Luoping and Lin, Endian and Jiang, Donghong and Zhu, Chuang}, title = {RE-VLM: Event-Augmented Vision-Language Model for Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10377-10386} }
Scaling-Aware Data Selection for End-to-End Autonomous Driving Systems: Tolga Dimlioglu,

Nadine Chang,

Maying Shen,

Rafid Mahmood,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dimlioglu_2026_CVPR, author = {Dimlioglu, Tolga and Chang, Nadine and Shen, Maying and Mahmood, Rafid and Alvarez, Jose M.}, title = {Scaling-Aware Data Selection for End-to-End Autonomous Driving Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17798-17808} }
Neural Dynamic GI: Random-Access Neural Compression for Temporal Lightmaps in Dynamic Lighting Environments: Jianhui Wu,

Jian Zhou,

Zhi Zhou,

Zhangjin Huang,

Chao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jianhui and Zhou, Jian and Zhou, Zhi and Huang, Zhangjin and Li, Chao}, title = {Neural Dynamic GI: Random-Access Neural Compression for Temporal Lightmaps in Dynamic Lighting Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5316-5325} }
A Faster Path to Continual Learning: Wei Li,

Hangjie Yuan,

Zixiang Zhao,

Borui Kang,

Ziwei Liu,

Tao Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wei and Yuan, Hangjie and Zhao, Zixiang and Kang, Borui and Liu, Ziwei and Feng, Tao}, title = {A Faster Path to Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25088-25098} }
HAD: Heterogeneity-Aware Distillation for Lifelong Heterogeneous Learning: Xuerui Zhang,

Xuehao Wang,

Zhan Zhuang,

Linglan Zhao,

Ziyue Li,

Xinmin Zhang,

Zhihuan Song,

Yu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuerui and Wang, Xuehao and Zhuang, Zhan and Zhao, Linglan and Li, Ziyue and Zhang, Xinmin and Song, Zhihuan and Zhang, Yu}, title = {HAD: Heterogeneity-Aware Distillation for Lifelong Heterogeneous Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10863-10873} }
PaQ-DETR: Learning Pattern and Quality-Aware Dynamic Queries for Object Detection: Zhengjian Kang,

Jun Zhuang,

Kangtong Mo,

Qi Chen,

Rui Liu,

Ye Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Zhengjian and Zhuang, Jun and Mo, Kangtong and Chen, Qi and Liu, Rui and Zhang, Ye}, title = {PaQ-DETR: Learning Pattern and Quality-Aware Dynamic Queries for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25504-25513} }
VCU-Bridge: Hierarchical Visual Connotation Understanding via Semantic Bridging: Ming Zhong,

Yuanlei Wang,

Liuzhou Zhang,

Ruichuan An,

Renrui Zhang,

Hao Liang,

Ming Lu,

Ying Shen,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Ming and Wang, Yuanlei and Zhang, Liuzhou and An, Ruichuan and Zhang, Renrui and Liang, Hao and Lu, Ming and Shen, Ying and Zhang, Wentao}, title = {VCU-Bridge: Hierarchical Visual Connotation Understanding via Semantic Bridging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26187-26197} }
Cross-Modal Emotion Transfer for Emotion Editing in Talking Face Video: Chanhyuk Choi,

Taesoo Kim,

Donggyu Lee,

Siyeol Jung,

Taehwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Chanhyuk and Kim, Taesoo and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Cross-Modal Emotion Transfer for Emotion Editing in Talking Face Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1759-1770} }
Seeing What Matters: Visual Preference Policy Optimization for Visual Generation: Ziqi Ni,

Yuanzhi Liang,

Rui Li,

Yi Zhou,

Haibin Huang,

Chi Zhang,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Ziqi and Liang, Yuanzhi and Li, Rui and Zhou, Yi and Huang, Haibin and Zhang, Chi and Li, Xuelong}, title = {Seeing What Matters: Visual Preference Policy Optimization for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27260-27269} }
Harnessing the Power of Foundation Models for Accurate Material Classification: Qingran Lin,

Fengwei Yang,

Chaolun Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Qingran and Yang, Fengwei and Zhu, Chaolun}, title = {Harnessing the Power of Foundation Models for Accurate Material Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3636-3645} }
When Transformers Meet Mamba: A Hybrid Transformer-Mamba Network for Video Object Detection: Qiang Qi,

Xiao Wang,

Zongyuan Du,

Yu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Qiang and Wang, Xiao and Du, Zongyuan and Zhang, Yu}, title = {When Transformers Meet Mamba: A Hybrid Transformer-Mamba Network for Video Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18492-18502} }
Motion 3-to-4: 3D Motion Reconstruction for 4D Synthesis: Hongyuan Chen,

Xingyu Chen,

Zexiang Xu,

Anpei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hongyuan and Chen, Xingyu and Xu, Zexiang and Chen, Anpei}, title = {Motion 3-to-4: 3D Motion Reconstruction for 4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28947-28958} }
Generative Modeling of Weights: Generalization or Memorization?: Boya Zeng,

Yida Yin,

Zhiqiu Xu,

Zhuang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Boya and Yin, Yida and Xu, Zhiqiu and Liu, Zhuang}, title = {Generative Modeling of Weights: Generalization or Memorization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41974-41984} }
HorizonForge: Driving Scene Editing with Any Trajectories and Any Vehicles: Yifan Wang,

Francesco Pittaluga,

Zaid Tasneem,

Chenyu You,

Manmohan Chandraker,

Ziyu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yifan and Pittaluga, Francesco and Tasneem, Zaid and You, Chenyu and Chandraker, Manmohan and Jiang, Ziyu}, title = {HorizonForge: Driving Scene Editing with Any Trajectories and Any Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24895-24905} }
EMAD: Evidence-Centric Grounded Multimodal Diagnosis for Alzheimer's Disease: Qiuhui Chen,

Xuancheng Yao,

Zhenglei Zhou,

Xinyue Hu,

Yi Hong; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qiuhui and Yao, Xuancheng and Zhou, Zhenglei and Hu, Xinyue and Hong, Yi}, title = {EMAD: Evidence-Centric Grounded Multimodal Diagnosis for Alzheimer's Disease}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23031-23040} }
OnlineHMR: Video-based Online World-Grounded Human Mesh Recovery: Yiwen Zhao,

Ce Zheng,

Yufu Wang,

Hsueh-Han Daniel Yang,

Liting Wen,

László A. Jeni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yiwen and Zheng, Ce and Wang, Yufu and Yang, Hsueh-Han Daniel and Wen, Liting and Jeni, L\'aszl\'o A.}, title = {OnlineHMR: Video-based Online World-Grounded Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13951-13961} }
I'm a Map! Interpretable Motion-Attentive Maps: Spatio-Temporally Localizing Concepts in Video Diffusion Transformers: Youngjun Jun,

Seil Kang,

Woojung Han,

Seong Jae Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Jun_2026_CVPR, author = {Jun, Youngjun and Kang, Seil and Han, Woojung and Hwang, Seong Jae}, title = {I'm a Map! Interpretable Motion-Attentive Maps: Spatio-Temporally Localizing Concepts in Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11525-11535} }
Parallel Jacobi Decoding for Fast Autoregressive Image Generation: Boya Liao,

Ying Li,

Siyong Jian,

Huan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Boya and Li, Ying and Jian, Siyong and Wang, Huan}, title = {Parallel Jacobi Decoding for Fast Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9008-9018} }
Addressing Exacerbated Attention Sink for Source-Free Cross-Domain Few-Shot Learning: Shuai Yi,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Shuai and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Addressing Exacerbated Attention Sink for Source-Free Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29494-29503} }
MODIX: A Training-Free Multimodal Information-Driven Positional Index Scaling for Vision-Language Models: Ruoxiang Huang,

Zhen Yuan; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Ruoxiang and Yuan, Zhen}, title = {MODIX: A Training-Free Multimodal Information-Driven Positional Index Scaling for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31534-31543} }
HamiPose: Hamiltonian Optimization for Unsupervised Domain Adaptive Pose Estimation: Jiawen Li,

Fei Jiang,

Dandan Zhu,

Aimin Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiawen and Jiang, Fei and Zhu, Dandan and Zhou, Aimin}, title = {HamiPose: Hamiltonian Optimization for Unsupervised Domain Adaptive Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13856-13865} }
CGU-Bayes: Causal Graph Uncertainty-Guided Bayesian Inference for Domain Generalization: Naiyu Yin,

Hanjing Wang,

Yue Yu,

Tian Gao,

Amit Dhurandhar,

Chung-Hao Lee,

Qiang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Naiyu and Wang, Hanjing and Yu, Yue and Gao, Tian and Dhurandhar, Amit and Lee, Chung-Hao and Ji, Qiang}, title = {CGU-Bayes: Causal Graph Uncertainty-Guided Bayesian Inference for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10522-10532} }
MSCD-GS: Motion-Separated Cooperative Deblurring Dynamic Reconstruction via Gaussian Splatting: Yongjian Liao,

Xu Zou,

Wenjun Chen,

Huixuan Li,

Xiaoen Xie,

Chunxi Li,

Shixiang Huang,

Gang Zhang,

Jiahuan Zhou,

Sheng Zhong,

Luxin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Yongjian and Zou, Xu and Chen, Wenjun and Li, Huixuan and Xie, Xiaoen and Li, Chunxi and Huang, Shixiang and Zhang, Gang and Zhou, Jiahuan and Zhong, Sheng and Yan, Luxin}, title = {MSCD-GS: Motion-Separated Cooperative Deblurring Dynamic Reconstruction via Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11726-11735} }
Activation Matters: Test-time Activated Negative Labels for OOD Detection with Vision-Language Models: Yabin Zhang,

Maya Varma,

Yunhe Gao,

Jean-Benoit Delbrouck,

Jiaming Liu,

Chong Wang,

Curtis Langlotz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yabin and Varma, Maya and Gao, Yunhe and Delbrouck, Jean-Benoit and Liu, Jiaming and Wang, Chong and Langlotz, Curtis}, title = {Activation Matters: Test-time Activated Negative Labels for OOD Detection with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17462-17473} }
3D-Aware Multi-Task Learning with Cross-View Correlations for Dense Scene Understanding: Xiaoye Wang,

Chen Tang,

Xiangyu Yue,

Wei-Hong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoye and Tang, Chen and Yue, Xiangyu and Li, Wei-Hong}, title = {3D-Aware Multi-Task Learning with Cross-View Correlations for Dense Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5793-5803} }
CoIn3D: Revisiting Configuration-Invariant Multi-Camera 3D Object Detection: Zhaonian Kuang,

Rui Ding,

Haotian Wang,

Xinhu Zheng,

Meng Yang,

Gang Hua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2026_CVPR, author = {Kuang, Zhaonian and Ding, Rui and Wang, Haotian and Zheng, Xinhu and Yang, Meng and Hua, Gang}, title = {CoIn3D: Revisiting Configuration-Invariant Multi-Camera 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40707-40716} }
FisherPoser: Human Motion Estimation from Sparse Observations with Hierarchical Region-Wise Fisher-Matrix Uncertainty Modeling: Songpengcheng Xia,

Qingyu Zhang,

Zhuo Su,

Jiarui Yang,

Zengyuan Lai,

Qi Wu,

Ling Pei; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Songpengcheng and Zhang, Qingyu and Su, Zhuo and Yang, Jiarui and Lai, Zengyuan and Wu, Qi and Pei, Ling}, title = {FisherPoser: Human Motion Estimation from Sparse Observations with Hierarchical Region-Wise Fisher-Matrix Uncertainty Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28413-28423} }
Multigrain-aware Semantic Prototype Scanning and Tri-Token Prompt Learning Embraced High-Order RWKV for Pan-Sharpening: Junfeng Li,

Wenyang Zhou,

Xueheng Li,

Xuanhua He,

Jianhou Gan,

Wenqi Ren; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Junfeng and Zhou, Wenyang and Li, Xueheng and He, Xuanhua and Gan, Jianhou and Ren, Wenqi}, title = {Multigrain-aware Semantic Prototype Scanning and Tri-Token Prompt Learning Embraced High-Order RWKV for Pan-Sharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13234-13243} }
Action Motifs: Self-Supervised Hierarchical Representation of Human Body Movements: Genki Kinoshita,

Shu Nakamura,

Ryo Kawahara,

Shohei Nobuhara,

Yasutomo Kawanishi,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kinoshita_2026_CVPR, author = {Kinoshita, Genki and Nakamura, Shu and Kawahara, Ryo and Nobuhara, Shohei and Kawanishi, Yasutomo and Nishino, Ko}, title = {Action Motifs: Self-Supervised Hierarchical Representation of Human Body Movements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20139-20148} }
StreamRAG: Enhancing Real-Time Video Understanding with Retrieval Augmentation: Junlin Xie,

Quanlong Zheng,

Ruifei Zhang,

Kuo Wang,

Yanhao Zhang,

Jinguo Luo,

Haonan Lu,

Xiang Wan,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Junlin and Zheng, Quanlong and Zhang, Ruifei and Wang, Kuo and Zhang, Yanhao and Luo, Jinguo and Lu, Haonan and Wan, Xiang and Li, Guanbin}, title = {StreamRAG: Enhancing Real-Time Video Understanding with Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38870-38879} }
Improving Vision-language Models with Perception-centric Process Reward Models: Yingqian Min,

Kun Zhou,

Yifan Li,

Yuhuan Wu,

Han Peng,

Yifan Du,

Wayne Xin Zhao,

Min Yang,

Ji-Rong Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Min_2026_CVPR, author = {Min, Yingqian and Zhou, Kun and Li, Yifan and Wu, Yuhuan and Peng, Han and Du, Yifan and Zhao, Wayne Xin and Yang, Min and Wen, Ji-Rong}, title = {Improving Vision-language Models with Perception-centric Process Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33099-33109} }
Repurposing 3D Generative Model for Autoregressive Layout Generation: Haoran Feng,

Yifan Niu,

Zehuan Huang,

Yang-Tian Sun,

Chunchao Guo,

Yuxin Peng,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Haoran and Niu, Yifan and Huang, Zehuan and Sun, Yang-Tian and Guo, Chunchao and Peng, Yuxin and Sheng, Lu}, title = {Repurposing 3D Generative Model for Autoregressive Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3231-3243} }
RnG: A Unified Transformer for Complete 3D Modeling from Partial Observations: Mochu Xiang,

Zhelun Shen,

Xuesong Li,

Jiahui Ren,

Jing Zhang,

Chen Zhao,

Shanshan Liu,

Haocheng Feng,

Jingdong Wang,

Yuchao Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Mochu and Shen, Zhelun and Li, Xuesong and Ren, Jiahui and Zhang, Jing and Zhao, Chen and Liu, Shanshan and Feng, Haocheng and Wang, Jingdong and Dai, Yuchao}, title = {RnG: A Unified Transformer for Complete 3D Modeling from Partial Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {406-416} }
S2FT: Parameter-Efficient Fine-Tuning in Sparse Spectrum Domain: Baoquan Zhang,

Zhehao Yu,

Lisai Zhang,

Kenghong Lin,

Tianran Chen,

Yuxi Sun,

Yunming Ye,

Yao He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Baoquan and Yu, Zhehao and Zhang, Lisai and Lin, Kenghong and Chen, Tianran and Sun, Yuxi and Ye, Yunming and He, Yao}, title = {S2FT: Parameter-Efficient Fine-Tuning in Sparse Spectrum Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20191-20201} }
DarkAct: A RGB-Thermal Dataset and Fusion Framework for Multimodal Low-Light Action Recognition: Yuanjun Tan,

Aoran Xiao,

Liqian Deng,

Zhigang Tu; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Yuanjun and Xiao, Aoran and Deng, Liqian and Tu, Zhigang}, title = {DarkAct: A RGB-Thermal Dataset and Fusion Framework for Multimodal Low-Light Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27345-27356} }
PAF: Perturbation-Aware Filtering for Open-Set Semi-Supervised Learning: Yinan Han,

Qing-Yuan Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Yinan and Jiang, Qing-Yuan}, title = {PAF: Perturbation-Aware Filtering for Open-Set Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24803-24812} }
AKCMamba-YOLO: Selective State Space Models For Real-Time Object Detection: Long Chen,

Hui Wang,

Man Xu,

Zexuan Li,

Zizhu Fan; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Long and Wang, Hui and Xu, Man and Li, Zexuan and Fan, Zizhu}, title = {AKCMamba-YOLO: Selective State Space Models For Real-Time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4438-4447} }
Global Information Thresholding for Sufficient and Necessary Circuits: Jegyeong Cho; [pdf]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Jegyeong}, title = {Global Information Thresholding for Sufficient and Necessary Circuits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3264-3273} }
M3Grounder: Mask-Based Multi-Span and Multi-Granular Grounding for Document QA: Venkata Kesav Venna,

Sai Madhusudan Gunda,

Jyothi Swaroopa Jinka,

Hrithik Sagar Rachakonda,

Anirudh Srinivasan,

Ravi Kiran Sarvadevabhatla; [pdf] [supp]
[bibtex]
@InProceedings{Venna_2026_CVPR, author = {Venna, Venkata Kesav and Gunda, Sai Madhusudan and Jinka, Jyothi Swaroopa and Rachakonda, Hrithik Sagar and Srinivasan, Anirudh and Sarvadevabhatla, Ravi Kiran}, title = {M3Grounder: Mask-Based Multi-Span and Multi-Granular Grounding for Document QA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23685-23695} }
GeoRK2: Geometry-Guided Runge-Kutta Integration for Diffusion Transformer Acceleration: Chaoqun Sun,

Zongjing Fu,

Powei Chang,

Jinpeng Zhang,

Jianxiang Xiang,

Yukang Gao,

Chenyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Chaoqun and Fu, Zongjing and Chang, Powei and Zhang, Jinpeng and Xiang, Jianxiang and Gao, Yukang and Wang, Chenyu}, title = {GeoRK2: Geometry-Guided Runge-Kutta Integration for Diffusion Transformer Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9404-9413} }
Towards Persistence: Learning Topological Constraints for Event-based Small Object Detection: Shiman He,

Nuo Chen,

Xinyi Ying,

Yihang Luo,

Yangsi Shi,

Zaiping Lin,

Miao Li; [pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Shiman and Chen, Nuo and Ying, Xinyi and Luo, Yihang and Shi, Yangsi and Lin, Zaiping and Li, Miao}, title = {Towards Persistence: Learning Topological Constraints for Event-based Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22291-22300} }
Spatial Matters: Position-Guided 3D Referring Expression Segmentation: Yabing Wang,

Zhuotao Tian,

Le Wang,

Zheng Qin,

Sanping Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yabing and Tian, Zhuotao and Wang, Le and Qin, Zheng and Zhou, Sanping}, title = {Spatial Matters: Position-Guided 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39486-39496} }
WonderZoom: Multi-Scale 3D World Generation: Jin Cao,

Hong-Xing Yu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Jin and Yu, Hong-Xing and Wu, Jiajun}, title = {WonderZoom: Multi-Scale 3D World Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5859-5869} }
RF4D:Neural Radar Fields for Novel View Synthesis in Outdoor Dynamic Scenes: Jiarui Zhang,

Zhihao Li,

Chong Wang,

Bihan Wen; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiarui and Li, Zhihao and Wang, Chong and Wen, Bihan}, title = {RF4D:Neural Radar Fields for Novel View Synthesis in Outdoor Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15387-15397} }
Concept-Aware Batch Sampling Improves Language-Image Pretraining: Adhiraj Ghosh,

Vishaal Udandarao,

Thao Nguyen,

Matteo Farina,

Mehdi Cherti,

Jenia Jitsev,

Sewoong Oh,

Elisa Ricci,

Ludwig Schmidt,

Matthias Bethge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Adhiraj and Udandarao, Vishaal and Nguyen, Thao and Farina, Matteo and Cherti, Mehdi and Jitsev, Jenia and Oh, Sewoong and Ricci, Elisa and Schmidt, Ludwig and Bethge, Matthias}, title = {Concept-Aware Batch Sampling Improves Language-Image Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3056-3068} }
GeoFree-CoSeg: Unsupervised Point Cloud-Image Cross-Modal Co-Segmentation Without Geometric Alignment: Xin Duan,

Xiabi Liu,

Liyuan Pan; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Xin and Liu, Xiabi and Pan, Liyuan}, title = {GeoFree-CoSeg: Unsupervised Point Cloud-Image Cross-Modal Co-Segmentation Without Geometric Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10778-10788} }
Negative Binomial Variational Autoencoders for Overdispersed Latent Modeling: Yixuan Zhang,

Jinhao Sheng,

Wenxin Zhang,

Quyu Kong,

Feng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yixuan and Sheng, Jinhao and Zhang, Wenxin and Kong, Quyu and Zhou, Feng}, title = {Negative Binomial Variational Autoencoders for Overdispersed Latent Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16289-16298} }
Any Resolution Any Geometry: From Multi-View To Multi-Patch: Wenqing Cui,

Zhenyu Li,

Mykola Lavreniuk,

Jian Shi,

Ramzi Idoughi,

Xiangjun Tang,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Wenqing and Li, Zhenyu and Lavreniuk, Mykola and Shi, Jian and Idoughi, Ramzi and Tang, Xiangjun and Wonka, Peter}, title = {Any Resolution Any Geometry: From Multi-View To Multi-Patch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12576-12585} }
Ultrasound-CLIP: Semantic-Aware Contrastive Pre-training for Ultrasound Image-Text Understanding: Jiayun Jin,

Haolong Chai,

Xueying Huang,

Xiaoqing Guo,

Zengwei Zheng,

Zhan Zhou,

Junmei Wang,

Xinyu Wang,

Jie Liu,

Binbin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Jiayun and Chai, Haolong and Huang, Xueying and Guo, Xiaoqing and Zheng, Zengwei and Zhou, Zhan and Wang, Junmei and Wang, Xinyu and Liu, Jie and Zhou, Binbin}, title = {Ultrasound-CLIP: Semantic-Aware Contrastive Pre-training for Ultrasound Image-Text Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6962-6971} }
ReHyAt: Recurrent Hybrid Attention for Video Diffusion Transformers: Mohsen Ghafoorian,

Amirhossein Habibian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghafoorian_2026_CVPR, author = {Ghafoorian, Mohsen and Habibian, Amirhossein}, title = {ReHyAt: Recurrent Hybrid Attention for Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40674-40684} }
MultiCrafter: High-Fidelity Multi-Subject Generation via Disentangled Attention and Identity-Aware Preference Alignment: Tao Wu,

Yibo Jiang,

Yehao Lu,

Zhizhong Wang,

Zeyi Huang,

Zequn Qin,

Xi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Jiang, Yibo and Lu, Yehao and Wang, Zhizhong and Huang, Zeyi and Qin, Zequn and Li, Xi}, title = {MultiCrafter: High-Fidelity Multi-Subject Generation via Disentangled Attention and Identity-Aware Preference Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36691-36702} }
Camouflage-aware Image-Text Retrieval via Expert Collaboration: Yao Jiang,

Zhongkuan Mao,

Xuan Wu,

Keren Fu,

Qijun Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yao and Mao, Zhongkuan and Wu, Xuan and Fu, Keren and Zhao, Qijun}, title = {Camouflage-aware Image-Text Retrieval via Expert Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23933-23943} }
Benchmarking PhD-Level Coding in 3D Geometric Computer Vision: Wenyi Li,

Renkai Luo,

Yue Yu,

Huan-ang Gao,

Mingju Gao,

Li Yuan,

Chaoyou Fu,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenyi and Luo, Renkai and Yu, Yue and Gao, Huan-ang and Gao, Mingju and Yuan, Li and Fu, Chaoyou and Zhao, Hao}, title = {Benchmarking PhD-Level Coding in 3D Geometric Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30974-30985} }
Mitigating Error Amplification in Fast Adversarial Training: Mengnan Zhao,

Lihe Zhang,

Bo Wang,

Tianhang Zheng,

Hong Zhong,

Geyong Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Mengnan and Zhang, Lihe and Wang, Bo and Zheng, Tianhang and Zhong, Hong and Min, Geyong}, title = {Mitigating Error Amplification in Fast Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13346-13355} }
AirSim360: A Panoramic Simulation Platform within Drone View: Xian Ge,

Yuling Pan,

Yuhang Zhang,

Xiang Li,

Weijun Zhang,

Dizhe Zhang,

Zhaoliang Wan,

Xin Lin,

Xiangkai Zhang,

Juntao Liang,

Xiangtai Li,

WenJie Jiang,

Bo Du,

Ming-Hsuan Yang,

Lu Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Xian and Pan, Yuling and Zhang, Yuhang and Li, Xiang and Zhang, Weijun and Zhang, Dizhe and Wan, Zhaoliang and Lin, Xin and Zhang, Xiangkai and Liang, Juntao and Li, Xiangtai and Jiang, WenJie and Du, Bo and Yang, Ming-Hsuan and Qi, Lu}, title = {AirSim360: A Panoramic Simulation Platform within Drone View}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26931-26940} }
HP-Edit: A Human-Preference Post-Training Framework for Image Editing: Fan Li,

Chonghuinan Wang,

Lina Lei,

Yuping Qiu,

Jiaqi Xu,

Jiaxiu Jiang,

Xinran Qin,

Zhikai Chen,

Fenglong Song,

Zhixin Wang,

Renjing Pei,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Fan and Wang, Chonghuinan and Lei, Lina and Qiu, Yuping and Xu, Jiaqi and Jiang, Jiaxiu and Qin, Xinran and Chen, Zhikai and Song, Fenglong and Wang, Zhixin and Pei, Renjing and Zuo, Wangmeng}, title = {HP-Edit: A Human-Preference Post-Training Framework for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43113-43123} }
SocialNav: Training Human-Inspired Foundation Model for Socially-Aware Embodied Navigation: Ziyi Chen,

Yingnan Guo,

Zedong Chu,

Minghua Luo,

Yanfen Shen,

Mingchao Sun,

Junjun Hu,

Shichao Xie,

Yang Kuan,

Pei Shi,

Zhining Gu,

Lu Liu,

Honglin Han,

Xiaolong Wu,

Mu Xu,

Yu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ziyi and Guo, Yingnan and Chu, Zedong and Luo, Minghua and Shen, Yanfen and Sun, Mingchao and Hu, Junjun and Xie, Shichao and Kuan, Yang and Shi, Pei and Gu, Zhining and Liu, Lu and Han, Honglin and Wu, Xiaolong and Xu, Mu and Zhang, Yu}, title = {SocialNav: Training Human-Inspired Foundation Model for Socially-Aware Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28796-28806} }
Rethinking Position Embedding as a Context Controller for Multi-Reference and Multi-Shot Video Generation: Binyuan Huang,

Yuning Lu,

Weinan Jia,

Hualiang Wang,

Mu Liu,

Daiqing Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Binyuan and Lu, Yuning and Jia, Weinan and Wang, Hualiang and Liu, Mu and Yang, Daiqing}, title = {Rethinking Position Embedding as a Context Controller for Multi-Reference and Multi-Shot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23304-23313} }
Complet4R: Geometric Complete 4D Reconstruction: Weibang Wang,

Kenan Li,

Zhuoguang Chen,

Yijun Yuan,

Hang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Weibang and Li, Kenan and Chen, Zhuoguang and Yuan, Yijun and Zhao, Hang}, title = {Complet4R: Geometric Complete 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {341-351} }
MSRL: Scaling Generative Multimodal Reward Modeling via Multi-Stage Reinforcement Learning: Chenglong Wang,

Yifu Huo,

Yang Gan,

Qiaozhi He,

Qi Meng,

Bei Li,

Yan Wang,

Junfu Liu,

Tianhua Zhou,

Jingbo Zhu,

Tong Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenglong and Huo, Yifu and Gan, Yang and He, Qiaozhi and Meng, Qi and Li, Bei and Wang, Yan and Liu, Junfu and Zhou, Tianhua and Zhu, Jingbo and Xiao, Tong}, title = {MSRL: Scaling Generative Multimodal Reward Modeling via Multi-Stage Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29410-29420} }
PixelRush: Ultra-Fast, Training-Free High-Resolution Image Generation via One-step Diffusion: Hong-Phuc Lai,

Phong Nguyen,

Anh Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Hong-Phuc and Nguyen, Phong and Tran, Anh}, title = {PixelRush: Ultra-Fast, Training-Free High-Resolution Image Generation via One-step Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35946-35955} }
ShadowDraw: From Any Object to Shadow-Drawing Compositional Art: Rundong Luo,

Noah Snavely,

Wei-Chiu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Rundong and Snavely, Noah and Ma, Wei-Chiu}, title = {ShadowDraw: From Any Object to Shadow-Drawing Compositional Art}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24428-24437} }
Cross-Scale Pansharpening via ScaleFormer and the PanScale Benchmark: Ke Cao,

Xuanhua He,

Xueheng Li,

Lingting Zhu,

Yingying Wang,

Ao Ma,

Zhanjie Zhang,

Man Zhou,

Chengjun Xie,

Jie Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Ke and He, Xuanhua and Li, Xueheng and Zhu, Lingting and Wang, Yingying and Ma, Ao and Zhang, Zhanjie and Zhou, Man and Xie, Chengjun and Zhang, Jie}, title = {Cross-Scale Pansharpening via ScaleFormer and the PanScale Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13211-13221} }
Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens: Xinxuan Lu,

Charless Fowlkes,

Alexander C. Berg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Xinxuan and Fowlkes, Charless and Berg, Alexander C.}, title = {Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29222-29232} }
MatLat: Material Latent Space for PBR Texture Generation: Kyeongmin Yeo,

Yunhong Min,

Jaihoon Kim,

Minhyuk Sung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeo_2026_CVPR, author = {Yeo, Kyeongmin and Min, Yunhong and Kim, Jaihoon and Sung, Minhyuk}, title = {MatLat: Material Latent Space for PBR Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4602-4612} }
LoG3D: Ultra-High-Resolution 3D Shape Modeling via Local-to-Global Partitioning: Xinran Yang,

Shuichang Lai,

Jiangjing Lyu,

Hongjie Li,

Bowen Pan,

Yuanqi Li,

Jie Guo,

Zhengkang Zhou,

Yanwen Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xinran and Lai, Shuichang and Lyu, Jiangjing and Li, Hongjie and Pan, Bowen and Li, Yuanqi and Guo, Jie and Zhou, Zhengkang and Guo, Yanwen}, title = {LoG3D: Ultra-High-Resolution 3D Shape Modeling via Local-to-Global Partitioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5945-5955} }
It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models: Anne Harrington,

A. Sophia Koepke,

Shyamgopal Karthik,

Trevor Darrell,

Alexei A. Efros; [pdf] [supp]
[bibtex]
@InProceedings{Harrington_2026_CVPR, author = {Harrington, Anne and Koepke, A. Sophia and Karthik, Shyamgopal and Darrell, Trevor and Efros, Alexei A.}, title = {It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43124-43134} }
StaR-KVQA: Structured Reasoning Traces for Implicit-Knowledge Visual Question Answering: Zhihao Wen,

Wenkang Wei,

Yuan Fang,

Xingtong Yu,

Hui Zhang,

Weicheng Zhu,

Xin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Zhihao and Wei, Wenkang and Fang, Yuan and Yu, Xingtong and Zhang, Hui and Zhu, Weicheng and Zhang, Xin}, title = {StaR-KVQA: Structured Reasoning Traces for Implicit-Knowledge Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5114-5124} }
A Causal Marriage between VLM and IRM from Understanding to Reasoning: Ziliang Chen,

Tianang Xiao,

Jusheng Zhang,

Yongsen Zheng,

Yang Liu,

Zhao-rong Lai,

Liang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ziliang and Xiao, Tianang and Zhang, Jusheng and Zheng, Yongsen and Liu, Yang and Lai, Zhao-rong and Lin, Liang}, title = {A Causal Marriage between VLM and IRM from Understanding to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4749-4760} }
CamDirector: Towards Long-Term Coherent Video Trajectory Editing: Kejia Yin,

Zhihao Shi,

Weilin Wan,

Yuhongze Zhou,

Yuanhao Yu,

Xinxin Zuo,

Qiang Sun,

Juwei Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Kejia and Shi, Zhihao and Wan, Weilin and Zhou, Yuhongze and Yu, Yuanhao and Zuo, Xinxin and Sun, Qiang and Lu, Juwei}, title = {CamDirector: Towards Long-Term Coherent Video Trajectory Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32683-32692} }
SpiderCam: Low-Power Snapshot Depth from Differential Defocus: Marcos A. Ferreira,

Tianao Li,

John Mamish,

Josiah Hester,

Yaman Sangar,

Qi Guo,

Emma Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ferreira_2026_CVPR, author = {Ferreira, Marcos A. and Li, Tianao and Mamish, John and Hester, Josiah and Sangar, Yaman and Guo, Qi and Alexander, Emma}, title = {SpiderCam: Low-Power Snapshot Depth from Differential Defocus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41699-41709} }
Denoising, Fast and Slow: Difficulty-Aware Adaptive Sampling for Image Generation: Johannes Schusterbauer,

Ming Gui,

Yusong Li,

Pingchuan Ma,

Felix Krause,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schusterbauer_2026_CVPR, author = {Schusterbauer, Johannes and Gui, Ming and Li, Yusong and Ma, Pingchuan and Krause, Felix and Ommer, Bj\"orn}, title = {Denoising, Fast and Slow: Difficulty-Aware Adaptive Sampling for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43260-43270} }
Revisiting the Necessity of Lengthy Chain-of-Thought in Vision-centric Reasoning Generalization: Yifan Du,

Kun Zhou,

Yingqian Min,

Yue Ling,

Wayne Xin Zhao,

Youbin Wu,

Ji-Rong Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Yifan and Zhou, Kun and Min, Yingqian and Ling, Yue and Zhao, Wayne Xin and Wu, Youbin and Wen, Ji-Rong}, title = {Revisiting the Necessity of Lengthy Chain-of-Thought in Vision-centric Reasoning Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12019-12029} }
Leveraging Multispectral Sensors for Color Correction in Mobile Cameras: Luca Cogo,

Marco Buzzelli,

Simone Bianco,

Javier Vazquez-Corral,

Raimondo Schettini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cogo_2026_CVPR, author = {Cogo, Luca and Buzzelli, Marco and Bianco, Simone and Vazquez-Corral, Javier and Schettini, Raimondo}, title = {Leveraging Multispectral Sensors for Color Correction in Mobile Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12438-12447} }
Uni-DAD: Unified Distillation and Adaptation of Diffusion Models for Few-step Few-shot Image Generation: Yara Bahram,

Mélodie Desbos,

Mohammadhadi Shateri,

Eric Granger; [pdf] [arXiv]
[bibtex]
@InProceedings{Bahram_2026_CVPR, author = {Bahram, Yara and Desbos, M\'elodie and Shateri, Mohammadhadi and Granger, Eric}, title = {Uni-DAD: Unified Distillation and Adaptation of Diffusion Models for Few-step Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26635-26645} }
Real-Time Generation of Streamable Talking Portrait Video with Reference-Guided Deep Compression VAEs: Sicheng Xu,

Yu Deng,

Shoukang Hu,

Yichuan Wang,

Yizhong Zhang,

Zhan Chen,

Jiaolong Yang,

Baining Guo; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Sicheng and Deng, Yu and Hu, Shoukang and Wang, Yichuan and Zhang, Yizhong and Chen, Zhan and Yang, Jiaolong and Guo, Baining}, title = {Real-Time Generation of Streamable Talking Portrait Video with Reference-Guided Deep Compression VAEs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9284-9295} }
Towards Human-Like Robot Handwriting via Contour-Aware Generation: Yutao Qin,

Gang Dai,

Yifan Zhang,

Youwei Han,

Qisheng He,

Shuangping Huang; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yutao and Dai, Gang and Zhang, Yifan and Han, Youwei and He, Qisheng and Huang, Shuangping}, title = {Towards Human-Like Robot Handwriting via Contour-Aware Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31597-31607} }
AE2VID: Event-based Video Reconstruction via Aperture Modulation: Chenxu Bai,

Boyu Li,

Peiqi Duan,

Xinyu Zhou,

Hanyue Lou,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Chenxu and Li, Boyu and Duan, Peiqi and Zhou, Xinyu and Lou, Hanyue and Shi, Boxin}, title = {AE2VID: Event-based Video Reconstruction via Aperture Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15115-15124} }
Anchoring and Rescaling Attention for Semantically Coherent Inbetweening: Tae Eun Choi,

Sumin Shim,

Junhyeok Kim,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Tae Eun and Shim, Sumin and Kim, Junhyeok and Hwang, Seong Jae}, title = {Anchoring and Rescaling Attention for Semantically Coherent Inbetweening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8976-8985} }
UniT: Unified Multimodal Chain-of-Thought Test-time Scaling: Leon Liangyu Chen,

Haoyu Ma,

Zhipeng Fan,

Ziqi Huang,

Animesh Sinha,

Xiaoliang Dai,

Jialiang Wang,

Zecheng He,

Jianwei Yang,

Chunyuan Li,

Junzhe Sun,

Chu Wang,

Serena Yeung-Levy,

Felix Juefei-Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Leon Liangyu and Ma, Haoyu and Fan, Zhipeng and Huang, Ziqi and Sinha, Animesh and Dai, Xiaoliang and Wang, Jialiang and He, Zecheng and Yang, Jianwei and Li, Chunyuan and Sun, Junzhe and Wang, Chu and Yeung-Levy, Serena and Juefei-Xu, Felix}, title = {UniT: Unified Multimodal Chain-of-Thought Test-time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30257-30267} }
Verifying Neural Network Robustness with Dual Perturbations: Hai Duong,

Lam Nguyen,

Thanh Le,

ThanhVu Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Duong_2026_CVPR, author = {Duong, Hai and Nguyen, Lam and Le, Thanh and Nguyen, ThanhVu}, title = {Verifying Neural Network Robustness with Dual Perturbations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27916-27925} }
VoxTell: Free-Text Promptable Universal 3D Medical Image Segmentation: Maximilian Rokuss,

Moritz Langenberg,

Yannick Kirchhoff,

Fabian Isensee,

Benjamin Hamm,

Constantin Ulrich,

Sebastian Regnery,

Lukas Bauer,

Efthimios Katsigiannopulos,

Tobias Norajitra,

Klaus Maier-Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rokuss_2026_CVPR, author = {Rokuss, Maximilian and Langenberg, Moritz and Kirchhoff, Yannick and Isensee, Fabian and Hamm, Benjamin and Ulrich, Constantin and Regnery, Sebastian and Bauer, Lukas and Katsigiannopulos, Efthimios and Norajitra, Tobias and Maier-Hein, Klaus}, title = {VoxTell: Free-Text Promptable Universal 3D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37538-37557} }
Beyond Graph Model: Reliable VLM Fine-Tuning via Random Graph Adapter: Bo Jiang,

Xueyang Ze,

Beibei Wang,

Xixi Wang,

Xixi Wan,

Bin Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Bo and Ze, Xueyang and Wang, Beibei and Wang, Xixi and Wan, Xixi and Luo, Bin}, title = {Beyond Graph Model: Reliable VLM Fine-Tuning via Random Graph Adapter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11664-11673} }
Event-Illumination Collaborative Low-light Image Enhancement with a High-resolution Real-world Dataset: Senyan Xu,

Zhijing Sun,

Kean Liu,

Xin Lu,

Ruixuan Jiang,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Senyan and Sun, Zhijing and Liu, Kean and Lu, Xin and Jiang, Ruixuan and Fu, Xueyang and Zha, Zheng-Jun}, title = {Event-Illumination Collaborative Low-light Image Enhancement with a High-resolution Real-world Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22270-22280} }
VGent: Visual Grounding via Modular Design for Disentangling Reasoning and Prediction: Weitai Kang,

Jason Kuen,

Mengwei Ren,

Zijun Wei,

Yan Yan,

Kangning Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Weitai and Kuen, Jason and Ren, Mengwei and Wei, Zijun and Yan, Yan and Liu, Kangning}, title = {VGent: Visual Grounding via Modular Design for Disentangling Reasoning and Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41160-41170} }
HulluEdit: Single-Pass Evidence-Consistent Subspace Editing for Mitigating Hallucinations in Large Vision-Language Models: Yangguang Lin,

Quan Fang,

Yufei Li,

Jiachen Sun,

Junyu Gao,

Jitao Sang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yangguang and Fang, Quan and Li, Yufei and Sun, Jiachen and Gao, Junyu and Sang, Jitao}, title = {HulluEdit: Single-Pass Evidence-Consistent Subspace Editing for Mitigating Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11086-11095} }
Probabilistic Prompt Adaptation for Unified Image Aesthetics and Quality Assessment: Takayuki Hara,

Yuya Otsuka; [pdf] [supp]
[bibtex]
@InProceedings{Hara_2026_CVPR, author = {Hara, Takayuki and Otsuka, Yuya}, title = {Probabilistic Prompt Adaptation for Unified Image Aesthetics and Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37236-37246} }
LiveGesture: Streamable Co-Speech Gesture Generation Model: Muhammad Usama Saleem,

Mayur Jagdishbhai Patel,

Ekkasit Pinyoanuntapong,

Zhongxing Qin,

Li Yang,

Hongfei Xue,

Ahmed Helmy,

Chen Chen,

Pu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saleem_2026_CVPR, author = {Saleem, Muhammad Usama and Patel, Mayur Jagdishbhai and Pinyoanuntapong, Ekkasit and Qin, Zhongxing and Yang, Li and Xue, Hongfei and Helmy, Ahmed and Chen, Chen and Wang, Pu}, title = {LiveGesture: Streamable Co-Speech Gesture Generation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2264-2273} }
PercHead: Perceptual Head Model for Single-Image 3D Head Reconstruction & Editing: Antonio Oroz,

Matthias Nießner,

Tobias Kirschstein; [pdf] [supp]
[bibtex]
@InProceedings{Oroz_2026_CVPR, author = {Oroz, Antonio and Nie{\ss}ner, Matthias and Kirschstein, Tobias}, title = {PercHead: Perceptual Head Model for Single-Image 3D Head Reconstruction \& Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4097-4108} }
Defending Unauthorized Model Merging via Dual-Stage Weight Protection: Wei-Jia Chen,

Min-Yan Tsai,

Cheng-Yi Lee,

Chia-Mu Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Wei-Jia and Tsai, Min-Yan and Lee, Cheng-Yi and Yu, Chia-Mu}, title = {Defending Unauthorized Model Merging via Dual-Stage Weight Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27926-27935} }
SMAP: Semantic Route Planning with Map-Grounded Multimodal Alignment: Wenjie Zhang,

Chen Yang,

Xin Lu,

Zhen Wang,

Yue Liu,

Bobo Xi,

Pengbo Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenjie and Yang, Chen and Lu, Xin and Wang, Zhen and Liu, Yue and Xi, Bobo and Zhang, Pengbo}, title = {SMAP: Semantic Route Planning with Map-Grounded Multimodal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40108-40118} }
ActivityForensics: A Comprehensive Benchmark for Localizing Manipulated Activity in Videos: Peijun Bao,

Anwei Luo,

Gang Pan,

Alex C. Kot,

Xudong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Peijun and Luo, Anwei and Pan, Gang and Kot, Alex C. and Jiang, Xudong}, title = {ActivityForensics: A Comprehensive Benchmark for Localizing Manipulated Activity in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42987-42996} }
Affine Perspective-Three-Point Problem: Gaku Nakano; [pdf] [supp]
[bibtex]
@InProceedings{Nakano_2026_CVPR, author = {Nakano, Gaku}, title = {Affine Perspective-Three-Point Problem}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12217-12226} }
Refacade: Editing Object with Given Reference Texture: Youze Huang,

Penghui Ruan,

Bojia Zi,

Xianbiao Qi,

Jianan Wang,

Rong Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Youze and Ruan, Penghui and Zi, Bojia and Qi, Xianbiao and Wang, Jianan and Xiao, Rong}, title = {Refacade: Editing Object with Given Reference Texture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1961-1972} }
Fine-grained Image Aesthetic Assessment: Learning Discriminative Scores from Relative Ranks: Zhichao Yang,

Jianjie Wang,

Zhixianhe Zhang,

Pangu Xie,

Xiangfei Sheng,

Pengfei Chen,

Leida Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhichao and Wang, Jianjie and Zhang, Zhixianhe and Xie, Pangu and Sheng, Xiangfei and Chen, Pengfei and Li, Leida}, title = {Fine-grained Image Aesthetic Assessment: Learning Discriminative Scores from Relative Ranks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {145-155} }
ThinkGen: Generalized Thinking for Visual Generation: Siyu Jiao,

Yiheng Lin,

Yujie Zhong,

Qi She,

Wei Zhou,

Xiaohan Lan,

Zilong Huang,

Fei Yu,

Yingchen Yu,

Yunqing Zhao,

Yao Zhao,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Siyu and Lin, Yiheng and Zhong, Yujie and She, Qi and Zhou, Wei and Lan, Xiaohan and Huang, Zilong and Yu, Fei and Yu, Yingchen and Zhao, Yunqing and Zhao, Yao and Wei, Yunchao}, title = {ThinkGen: Generalized Thinking for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14713-14723} }
Mitigating The Distribution Shift of Diffusion-based Dataset Distillation: Yue Xu,

Chenyu Hu,

Pengyu An,

Yong-Lu Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yue and Hu, Chenyu and An, Pengyu and Li, Yong-Lu}, title = {Mitigating The Distribution Shift of Diffusion-based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33943-33952} }
LiteSense: Lifting Lightweight ToF with RGB for High-Resolution Metric Depth Estimation: Yusheng Li,

Lizhi Lou,

Yan Tang,

Zekai Miao,

Shaoming Zhang,

Jianmei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yusheng and Lou, Lizhi and Tang, Yan and Miao, Zekai and Zhang, Shaoming and Wang, Jianmei}, title = {LiteSense: Lifting Lightweight ToF with RGB for High-Resolution Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5783-5792} }
MoEActok: A MoE-based Action Tokenizer for Vision-Language-Action Models: Chunpu Xu,

Zhixuan Liang,

Tianshuo Yang,

Chi-Min Chan,

Yang Xiao,

Jessie Wang,

Xiaokang Yang,

Yao Mu; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Chunpu and Liang, Zhixuan and Yang, Tianshuo and Chan, Chi-Min and Xiao, Yang and Wang, Jessie and Yang, Xiaokang and Mu, Yao}, title = {MoEActok: A MoE-based Action Tokenizer for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28042-28051} }
COG: Confidence-aware Optimal Geometric Correspondence for Unsupervised Single-reference Novel Object Pose Estimation: Yuchen Che,

Jingtu Wu,

Hao Zheng,

Asako Kanezaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Yuchen and Wu, Jingtu and Zheng, Hao and Kanezaki, Asako}, title = {COG: Confidence-aware Optimal Geometric Correspondence for Unsupervised Single-reference Novel Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11567-11578} }
Generative Video Compression with One-Dimensional Latent Representation: Zihan Zheng,

Zhaoyang Jia,

Naifu Xue,

Jiahao Li,

Bin Li,

Zongyu Guo,

Xiaoyi Zhang,

Zhenghao Chen,

Houqiang Li,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zihan and Jia, Zhaoyang and Xue, Naifu and Li, Jiahao and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Chen, Zhenghao and Li, Houqiang and Lu, Yan}, title = {Generative Video Compression with One-Dimensional Latent Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41256-41265} }
ExPose: Reinforcing Video Generation Models for Extreme Pose Estimation: Youngho Yoon,

Wonjune Cho,

Hyunho Ha,

Sujung Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Youngho and Cho, Wonjune and Ha, Hyunho and Kim, Sujung and Yoon, Kuk-Jin}, title = {ExPose: Reinforcing Video Generation Models for Extreme Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32636-32646} }
ORCA: Orchestrated Reasoning with Collaborative Agents for Document Visual Question Answering: Aymen Lassoued,

Mohamed Ali Souibgui,

Yousri Kessentini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lassoued_2026_CVPR, author = {Lassoued, Aymen and Souibgui, Mohamed Ali and Kessentini, Yousri}, title = {ORCA: Orchestrated Reasoning with Collaborative Agents for Document Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19475-19486} }
Improving Calibration in Test-Time Prompt Tuning for Vision-Language Models via Data-Free Flatness-Aware Prompt Pretraining: Hyeonseo Jang,

Jaebyeong Jeon,

Joong-Won Hwang,

Kibok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Hyeonseo and Jeon, Jaebyeong and Hwang, Joong-Won and Lee, Kibok}, title = {Improving Calibration in Test-Time Prompt Tuning for Vision-Language Models via Data-Free Flatness-Aware Prompt Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24300-24309} }
Multi-SpatialMLLM: Multi-Frame Spatial Understanding with Multi-Modal Large Language Models: Runsen Xu,

Weiyao Wang,

Hao Tang,

Xingyu Chen,

Xiaodong Wang,

Fu-Jen Chu,

Matt Feiszli,

Kevin J. Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Runsen and Wang, Weiyao and Tang, Hao and Chen, Xingyu and Wang, Xiaodong and Chu, Fu-Jen and Feiszli, Matt and Liang, Kevin J.}, title = {Multi-SpatialMLLM: Multi-Frame Spatial Understanding with Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31078-31088} }
Cross-Domain Demo-to-Code via Neurosymbolic Counterfactual Reasoning: Jooyoung Kim,

Wonje Choi,

Younguk Song,

Honguk Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jooyoung and Choi, Wonje and Song, Younguk and Woo, Honguk}, title = {Cross-Domain Demo-to-Code via Neurosymbolic Counterfactual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18848-18858} }
Aligning What Vision-Language Models See and Perceive with Adaptive Information Flow: Chengxin Liu,

Wonseok Choi,

Chenshuang Zhang,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chengxin and Choi, Wonseok and Zhang, Chenshuang and Oh, Tae-Hyun}, title = {Aligning What Vision-Language Models See and Perceive with Adaptive Information Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24706-24715} }
Long-Tail Internet Photo Reconstruction: Yuan Li,

Yuanbo Xiangli,

Hadar Averbuch-Elor,

Noah Snavely,

Ruojin Cai; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuan and Xiangli, Yuanbo and Averbuch-Elor, Hadar and Snavely, Noah and Cai, Ruojin}, title = {Long-Tail Internet Photo Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {417-426} }
DocSeeker: Structured Visual Reasoning with Evidence Grounding for Long Document Understanding: Hao Yan,

Yuliang Liu,

Xingchen Liu,

Yuyi Zhang,

Minghui Liao,

Jihao Wu,

Wei Chen,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Hao and Liu, Yuliang and Liu, Xingchen and Zhang, Yuyi and Liao, Minghui and Wu, Jihao and Chen, Wei and Bai, Xiang}, title = {DocSeeker: Structured Visual Reasoning with Evidence Grounding for Long Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41140-41149} }
DriverGaze360: OmniDirectional Driver Attention with Object-Level Guidance: Shreedhar Govil,

Didier Stricker,

Jason Rambach; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Govil_2026_CVPR, author = {Govil, Shreedhar and Stricker, Didier and Rambach, Jason}, title = {DriverGaze360: OmniDirectional Driver Attention with Object-Level Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39786-39795} }
FlashIn: Fast and Accurate Image Inversion for Real-time Image Editing: Guangzhi Wang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Guangzhi}, title = {FlashIn: Fast and Accurate Image Inversion for Real-time Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30425-30434} }
Write Where It Matters: Policy-Guided Watermarks for 3D Gaussian Splatting: Nan Li,

Yike Zeng,

Qian Zhang,

Qi Zhang,

Zhiyi Pan,

Wei Feng,

Liang Wan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Nan and Zeng, Yike and Zhang, Qian and Zhang, Qi and Pan, Zhiyi and Feng, Wei and Wan, Liang}, title = {Write Where It Matters: Policy-Guided Watermarks for 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6580-6590} }
Symphony: A Cognitively-Inspired Multi-Agent System for Long-Video Understanding: Haiyang Yan,

Hongyun Zhou,

Peng Xu,

Xiaoxue Feng,

Mengyi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haiyang and Zhou, Hongyun and Xu, Peng and Feng, Xiaoxue and Liu, Mengyi}, title = {Symphony: A Cognitively-Inspired Multi-Agent System for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24031-24041} }
WikiCLIP: An Efficient Contrastive Baseline for Open-domain Visual Entity Recognition: Shan Ning,

Longtian Qiu,

Jiaxuan Sun,

Xuming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Shan and Qiu, Longtian and Sun, Jiaxuan and He, Xuming}, title = {WikiCLIP: An Efficient Contrastive Baseline for Open-domain Visual Entity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1596-1605} }
Gloria: Consistent Character Video Generation via Content Anchors: Yuhang Yang,

Fan Zhang,

Huaijin Pi,

Ailing Zeng,

Shuai Guo,

Guowei Xu,

Wei Zhai,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuhang and Zhang, Fan and Pi, Huaijin and Zeng, Ailing and Guo, Shuai and Xu, Guowei and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {Gloria: Consistent Character Video Generation via Content Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36724-36735} }
MoCha: End-to-End Video Character Replacement without Structural Guidance: Zhengbo Xu,

Jie Ma,

Ziheng Wang,

Zhan Peng,

Jun Liang,

Jing Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhengbo and Ma, Jie and Wang, Ziheng and Peng, Zhan and Liang, Jun and Li, Jing}, title = {MoCha: End-to-End Video Character Replacement without Structural Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16279-16288} }
UniVBench: Towards Unified Evaluation for Video Foundation Models: Jianhui Wei,

Xiaotian Zhang,

Yichen Li,

Yuan Wang,

Yan Zhang,

Ziyi Chen,

Zhihang Tang,

Wei Xu,

Zuozhu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jianhui and Zhang, Xiaotian and Li, Yichen and Wang, Yuan and Zhang, Yan and Chen, Ziyi and Tang, Zhihang and Xu, Wei and Liu, Zuozhu}, title = {UniVBench: Towards Unified Evaluation for Video Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25654-25666} }
CoVFT: Context-aware Visual Fine-tuning for Multimodal Large Language Models: Nan Zhou,

Huiqun Wang,

Yaoyan Zheng,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Nan and Wang, Huiqun and Zheng, Yaoyan and Huang, Di}, title = {CoVFT: Context-aware Visual Fine-tuning for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24341-24351} }
RaPA: Enhancing Transferable Targeted Attacks via Random Parameter Pruning: Tongrui Su,

Qingbin Li,

Shengyu Zhu,

Wei Chen,

Xueqi Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Tongrui and Li, Qingbin and Zhu, Shengyu and Chen, Wei and Cheng, Xueqi}, title = {RaPA: Enhancing Transferable Targeted Attacks via Random Parameter Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6538-6548} }
Faster-GS: Analyzing and Improving Gaussian Splatting Optimization: Florian Hahlbohm,

Linus Franke,

Martin Eisemann,

Marcus Magnor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hahlbohm_2026_CVPR, author = {Hahlbohm, Florian and Franke, Linus and Eisemann, Martin and Magnor, Marcus}, title = {Faster-GS: Analyzing and Improving Gaussian Splatting Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18946-18957} }
X-band Radar Non-Line-of-Sight Imaging: Dongyu Du,

Mingkun Zhao,

Yutong Yang,

Dominik Scheuble,

Xiaolong Huang,

Zijian Shao,

Mario Bijelic,

Kaushik Sengupta,

Felix Heide; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Dongyu and Zhao, Mingkun and Yang, Yutong and Scheuble, Dominik and Huang, Xiaolong and Shao, Zijian and Bijelic, Mario and Sengupta, Kaushik and Heide, Felix}, title = {X-band Radar Non-Line-of-Sight Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5647-5658} }
DiverseGRPO: Mitigating Mode Collapse in Image Generation via Diversity-Aware GRPO: Henglin Liu,

Huijuan Huang,

Jing Wang,

Chang Liu,

Xiu Li,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Henglin and Huang, Huijuan and Wang, Jing and Liu, Chang and Li, Xiu and Ji, Xiangyang}, title = {DiverseGRPO: Mitigating Mode Collapse in Image Generation via Diversity-Aware GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1864-1873} }
Breaking Smooth-Motion Assumptions: A UAV Benchmark for Multi-Object Tracking in Complex and Adverse Conditions: Jingtao Ye,

Kexin Zhang,

Xunchi Ma,

Yuechan Li,

Guangming Zhu,

Peiyi Shen,

Linhua Jiang,

Xiangdong Zhang,

Liang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Jingtao and Zhang, Kexin and Ma, Xunchi and Li, Yuechan and Zhu, Guangming and Shen, Peiyi and Jiang, Linhua and Zhang, Xiangdong and Zhang, Liang}, title = {Breaking Smooth-Motion Assumptions: A UAV Benchmark for Multi-Object Tracking in Complex and Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13594-13603} }
PROMO: Promptable Outfitting for Efficient High-Fidelity Virtual Try-On: Haohua Chen,

Tianze Zhou,

Wei Zhu,

Runqi Wang,

Yandong Guan,

Dejia Song,

Yibo Chen,

Xu Tang,

Yao Hu,

Lu Sheng,

Zhiyong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haohua and Zhou, Tianze and Zhu, Wei and Wang, Runqi and Guan, Yandong and Song, Dejia and Chen, Yibo and Tang, Xu and Hu, Yao and Sheng, Lu and Wu, Zhiyong}, title = {PROMO: Promptable Outfitting for Efficient High-Fidelity Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16074-16084} }
Action-Geometry Prediction with 3D Geometric Prior for Bimanual Manipulation: Chongyang Xu,

Haipeng Li,

Shen Cheng,

Haoqiang Fan,

Ziliang Feng,

Shuaicheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Chongyang and Li, Haipeng and Cheng, Shen and Fan, Haoqiang and Feng, Ziliang and Liu, Shuaicheng}, title = {Action-Geometry Prediction with 3D Geometric Prior for Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35036-35046} }
Unleashing the Intrinsic Visual Representation Capability of Multimodal Large Language Models: Hengzhuang Li,

Xinsong Zhang,

Qiming Peng,

Bin Luo,

Han Hu,

Dengyang Jiang,

Han-Jia Ye,

Teng Zhang,

Hai Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hengzhuang and Zhang, Xinsong and Peng, Qiming and Luo, Bin and Hu, Han and Jiang, Dengyang and Ye, Han-Jia and Zhang, Teng and Jin, Hai}, title = {Unleashing the Intrinsic Visual Representation Capability of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1771-1786} }
Few-shot Acoustic Synthesis with Multimodal Flow Matching: Amandine Brunetto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brunetto_2026_CVPR, author = {Brunetto, Amandine}, title = {Few-shot Acoustic Synthesis with Multimodal Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15773-15783} }
MotionEdit: Benchmarking and Learning Motion-Centric Image Editing: Yixin Wan,

Lei Ke,

Wenhao Yu,

Kai-Wei Chang,

Dong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Yixin and Ke, Lei and Yu, Wenhao and Chang, Kai-Wei and Yu, Dong}, title = {MotionEdit: Benchmarking and Learning Motion-Centric Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9263-9272} }
Revisiting 2D Foundation Models for Scalable 3D Medical Image Classification: Han Liu,

Bogdan Georgescu,

Yanbo Zhang,

Youngjin Yoo,

Michael Baumgartner,

Riqiang Gao,

Jianing Wang,

Gengyan Zhao,

Eli Gibson,

Dorin Comaniciu,

Sasa Grbic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Han and Georgescu, Bogdan and Zhang, Yanbo and Yoo, Youngjin and Baumgartner, Michael and Gao, Riqiang and Wang, Jianing and Zhao, Gengyan and Gibson, Eli and Comaniciu, Dorin and Grbic, Sasa}, title = {Revisiting 2D Foundation Models for Scalable 3D Medical Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30021-30031} }
OMG-Avatar: One-shot Multi-LOD Gaussian Head Avatar: Jianqiang Ren,

Lin Liu,

Steven Hoi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Jianqiang and Liu, Lin and Hoi, Steven}, title = {OMG-Avatar: One-shot Multi-LOD Gaussian Head Avatar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11017-11028} }
UniGen-1.5: Enhancing Image Generation and Editing through Reward Unification in RL: Rui Tian,

Mingfei Gao,

Haiming Gang,

Jiasen Lu,

Zhe Gan,

Yinfei Yang,

Zuxuan Wu,

Afshin Dehghan; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Rui and Gao, Mingfei and Gang, Haiming and Lu, Jiasen and Gan, Zhe and Yang, Yinfei and Wu, Zuxuan and Dehghan, Afshin}, title = {UniGen-1.5: Enhancing Image Generation and Editing through Reward Unification in RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29367-29378} }
Reflection Separation from a Single Image via Joint Latent Diffusion: Zheng-Hui Huang,

Zhixiang Wang,

Yu-Lun Liu,

Yung-Yu Chuang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zheng-Hui and Wang, Zhixiang and Liu, Yu-Lun and Chuang, Yung-Yu}, title = {Reflection Separation from a Single Image via Joint Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4569-4579} }
Anatomica: Localized Control over Geometric and Topological Properties for Anatomical Diffusion Models: Karim Kadry,

Abdalla Abdelwahed,

Ajay Manicka,

Naravich Chutisilp,

Farhad R. Nezami,

Elazer R. Edelman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kadry_2026_CVPR, author = {Kadry, Karim and Abdelwahed, Abdalla and Manicka, Ajay and Chutisilp, Naravich and Nezami, Farhad R. and Edelman, Elazer R.}, title = {Anatomica: Localized Control over Geometric and Topological Properties for Anatomical Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15595-15605} }
Leveraging Verifier-Based Reinforcement Learning in Image Editing: Hanzhong Guo,

Jie Wu,

Jie Liu,

Yu Gao,

Zilyu Ye,

Linxiao Yuan,

Xionghui Wang,

Yizhou Yu,

Weilin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhong and Wu, Jie and Liu, Jie and Gao, Yu and Ye, Zilyu and Yuan, Linxiao and Wang, Xionghui and Yu, Yizhou and Huang, Weilin}, title = {Leveraging Verifier-Based Reinforcement Learning in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34343-34352} }
UniCompress: Token Compression for Unified Vision-Language Understanding and Generation: Ziyao Wang,

Chen Chen,

Jingtao Li,

Weiming Zhuang,

Jiabo Huang,

Ang Li,

Lingjuan Lyu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyao and Chen, Chen and Li, Jingtao and Zhuang, Weiming and Huang, Jiabo and Li, Ang and Lyu, Lingjuan}, title = {UniCompress: Token Compression for Unified Vision-Language Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24663-24674} }
Imbalanced View Contribution Evaluation and Refinement for Deep Incomplete Multi-View Clustering: Taichun Zhou,

Zhibin Dong,

Hao Tan,

Siwei Wang,

Xinwang Liu,

En Zhu,

Di Hu,

Tianrui Liu,

Chuankun Li,

Kunlun He; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Taichun and Dong, Zhibin and Tan, Hao and Wang, Siwei and Liu, Xinwang and Zhu, En and Hu, Di and Liu, Tianrui and Li, Chuankun and He, Kunlun}, title = {Imbalanced View Contribution Evaluation and Refinement for Deep Incomplete Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39606-39616} }
MooCap: A Multi-View Benchmark for Cow-Object-Human Interaction and Behavior Dynamics: Ian Noronha,

Heather Neave,

Upinder Kaur; [pdf] [supp]
[bibtex]
@InProceedings{Noronha_2026_CVPR, author = {Noronha, Ian and Neave, Heather and Kaur, Upinder}, title = {MooCap: A Multi-View Benchmark for Cow-Object-Human Interaction and Behavior Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27324-27333} }
InnoAds-Composer: Efficient Condition Composition for E-Commerce Poster Generation: Yuxin Qin,

Ke Cao,

Haowei Liu,

Ao Ma,

Fengheng Li,

Honghe Zhu,

Zheng Zhang,

Run Ling,

Wei Feng,

Xuanhua He,

Zhanjie Zhang,

Zhen Guo,

Haoyi Bian,

Jingjing Lv,

Junjie Shen,

Ching Law; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yuxin and Cao, Ke and Liu, Haowei and Ma, Ao and Li, Fengheng and Zhu, Honghe and Zhang, Zheng and Ling, Run and Feng, Wei and He, Xuanhua and Zhang, Zhanjie and Guo, Zhen and Bian, Haoyi and Lv, Jingjing and Shen, Junjie and Law, Ching}, title = {InnoAds-Composer: Efficient Condition Composition for E-Commerce Poster Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32988-32999} }
Envisioning the Future, One Step at a Time: Stefan Andreas Baumann,

Jannik Wiese,

Tommaso Martorella,

Mahdi M. Kalayeh,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baumann_2026_CVPR, author = {Baumann, Stefan Andreas and Wiese, Jannik and Martorella, Tommaso and Kalayeh, Mahdi M. and Ommer, Bj\"orn}, title = {Envisioning the Future, One Step at a Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6823-6836} }
MeToM: Metadata-Guided Token Merging for Efficient Video LLMs: Zhuojie Wu,

Shijie Wang,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhuojie and Wang, Shijie and Yu, Xin}, title = {MeToM: Metadata-Guided Token Merging for Efficient Video LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10441-10450} }
Breaking Spurious Correlations: Uncertainty-Driven Causal Transformers for AU Detection: Yuru Wang,

Yue Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuru and Zhou, Yue}, title = {Breaking Spurious Correlations: Uncertainty-Driven Causal Transformers for AU Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7165-7174} }
Progressive Multi-cue Alignment for Unaligned RGBT Tracking: Jiandong Jin,

Chenglong Li,

Hao Feng,

Andong Lu,

Lili Huang,

Jin Tang; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Jiandong and Li, Chenglong and Feng, Hao and Lu, Andong and Huang, Lili and Tang, Jin}, title = {Progressive Multi-cue Alignment for Unaligned RGBT Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35207-35216} }
DynamicVGGT: Learning Dynamic Point Maps for 4D Scene Reconstruction in Autonomous Driving: Zhuolin He,

Jing Li,

Guanghao Li,

Xiaolei Chen,

Jiacheng Tang,

Siyang Zhang,

Zhounan Jin,

Feipeng Cai,

Bin Li,

Jian Pu,

Jia Cai,

Xiangyang Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zhuolin and Li, Jing and Li, Guanghao and Chen, Xiaolei and Tang, Jiacheng and Zhang, Siyang and Jin, Zhounan and Cai, Feipeng and Li, Bin and Pu, Jian and Cai, Jia and Xue, Xiangyang}, title = {DynamicVGGT: Learning Dynamic Point Maps for 4D Scene Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35670-35679} }
Self-Evaluation Unlocks Any-Step Text-to-Image Generation: Xin Yu,

Xiaojuan Qi,

Zhengqi Li,

Kai Zhang,

Richard Zhang,

Zhe Lin,

Eli Shechtman,

Tianyu Wang,

Yotam Nitzan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xin and Qi, Xiaojuan and Li, Zhengqi and Zhang, Kai and Zhang, Richard and Lin, Zhe and Shechtman, Eli and Wang, Tianyu and Nitzan, Yotam}, title = {Self-Evaluation Unlocks Any-Step Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7816-7826} }
AURA: Multi-modal Shared Autonomy for Urban Navigation: Yukai Ma,

Honglin He,

Selina Song,

Wayne Wu,

Bolei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yukai and He, Honglin and Song, Selina and Wu, Wayne and Zhou, Bolei}, title = {AURA: Multi-modal Shared Autonomy for Urban Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18171-18181} }
LuxRemix: Lighting Decomposition and Remixing for Indoor Scenes: Ruofan Liang,

Norman Müller,

Ethan Weber,

Duncan Zauss,

Nandita Vijaykumar,

Peter Kontschieder,

Christian Richardt; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Ruofan and M\"uller, Norman and Weber, Ethan and Zauss, Duncan and Vijaykumar, Nandita and Kontschieder, Peter and Richardt, Christian}, title = {LuxRemix: Lighting Decomposition and Remixing for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1100-1111} }
AVATAR: Reinforcement Learning to See, Hear, and Reason Over Video: Yogesh Kulkarni,

Pooyan Fazli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Yogesh and Fazli, Pooyan}, title = {AVATAR: Reinforcement Learning to See, Hear, and Reason Over Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7912-7922} }
Semantics Lead the Way: Harmonizing Semantic and Texture Modeling with Asynchronous Latent Diffusion: Yueming Pan,

Ruoyu Feng,

Qi Dai,

Yuqi Wang,

Wenfeng Lin,

Mingyu Guo,

Chong Luo,

Nanning Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Yueming and Feng, Ruoyu and Dai, Qi and Wang, Yuqi and Lin, Wenfeng and Guo, Mingyu and Luo, Chong and Zheng, Nanning}, title = {Semantics Lead the Way: Harmonizing Semantic and Texture Modeling with Asynchronous Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43664-43674} }
Geometry-Aligned and Anomaly-Aware Reconstruction for 3D Anomaly Detection: Linchun Wu,

Qin Zou,

Yuanhao Yue,

Zhongyuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Linchun and Zou, Qin and Yue, Yuanhao and Wang, Zhongyuan}, title = {Geometry-Aligned and Anomaly-Aware Reconstruction for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14648-14657} }
MPL: Match-guided Prototype Learning for Few-shot Action Recognition: Feng Yang,

Jie Zhao,

Fulin Luo,

Anyong Qin,

Tiecheng Song,

Yue Zhao,

Chenqiang Gao,

Junwei Han; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Feng and Zhao, Jie and Luo, Fulin and Qin, Anyong and Song, Tiecheng and Zhao, Yue and Gao, Chenqiang and Han, Junwei}, title = {MPL: Match-guided Prototype Learning for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34439-34448} }
CogDriver: Integrating Cognitive Inertia for Temporally Coherent Planning in Autonomous Driving: Pei Liu,

Qingtian Ning,

Xinyan Lu,

Haipeng Liu,

Weiliang Ma,

Dangen She,

Xianpeng Lang,

Jun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Pei and Ning, Qingtian and Lu, Xinyan and Liu, Haipeng and Ma, Weiliang and She, Dangen and Lang, Xianpeng and Ma, Jun}, title = {CogDriver: Integrating Cognitive Inertia for Temporally Coherent Planning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18150-18160} }
LUMINA: A Multi-Vendor Mammography Benchmark with Energy Harmonization Protocol: Hongyi Pan,

Gorkem Durak,

Halil Ertugrul Aktas,

Andrea M. Bejar,

Baver Tutun,

Emre Uysal,

Ezgi Bulbul,

Mehmet Fatih Dogan,

Berrin Erok,

Berna Akkus Yildirim,

Sukru Mehmet Erturk,

Ulas Bagci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Hongyi and Durak, Gorkem and Aktas, Halil Ertugrul and Bejar, Andrea M. and Tutun, Baver and Uysal, Emre and Bulbul, Ezgi and Dogan, Mehmet Fatih and Erok, Berrin and Yildirim, Berna Akkus and Erturk, Sukru Mehmet and Bagci, Ulas}, title = {LUMINA: A Multi-Vendor Mammography Benchmark with Energy Harmonization Protocol}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35301-35310} }
VerseCrafter: Dynamic Realistic Video World Model with 4D Geometric Control: Sixiao Zheng,

Minghao Yin,

Wenbo Hu,

Xiaoyu Li,

Ying Shan,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Sixiao and Yin, Minghao and Hu, Wenbo and Li, Xiaoyu and Shan, Ying and Fu, Yanwei}, title = {VerseCrafter: Dynamic Realistic Video World Model with 4D Geometric Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40277-40290} }
PhysSkin: Real-Time and Generalizable Physics-Based Animation via Self-Supervised Neural Skinning: Yuanhang Lei,

Tao Cheng,

Xingxuan Li,

Boming Zhao,

Siyuan Huang,

Ruizhen Hu,

Peter Yichen Chen,

Hujun Bao,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Yuanhang and Cheng, Tao and Li, Xingxuan and Zhao, Boming and Huang, Siyuan and Hu, Ruizhen and Chen, Peter Yichen and Bao, Hujun and Cui, Zhaopeng}, title = {PhysSkin: Real-Time and Generalizable Physics-Based Animation via Self-Supervised Neural Skinning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32357-32366} }
Learning Where to Look and How to Judge: Resolution-agnostic Image Quality Assessment with Quality-aware Saliency: Hakan Emre Gedik,

Shashank Gupta,

Alan Bovik; [pdf] [supp]
[bibtex]
@InProceedings{Gedik_2026_CVPR, author = {Gedik, Hakan Emre and Gupta, Shashank and Bovik, Alan}, title = {Learning Where to Look and How to Judge: Resolution-agnostic Image Quality Assessment with Quality-aware Saliency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37507-37517} }
VGGDrive: Empowering Vision-Language Models with Cross-View Geometric Grounding for Autonomous Driving: Jie Wang,

Guang Li,

Zhijian Huang,

Chenxu Dang,

Hangjun Ye,

Yahong Han,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jie and Li, Guang and Huang, Zhijian and Dang, Chenxu and Ye, Hangjun and Han, Yahong and Chen, Long}, title = {VGGDrive: Empowering Vision-Language Models with Cross-View Geometric Grounding for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10954-10964} }
LRHDR: Learning Representation-enhanced HDR Video Reconstruction: Chenzhuo Liao,

Xin Chen,

Bingchen Li,

Yu Meng,

Tao Yue,

Xuemei Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Chenzhuo and Chen, Xin and Li, Bingchen and Meng, Yu and Yue, Tao and Hu, Xuemei}, title = {LRHDR: Learning Representation-enhanced HDR Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41584-41593} }
FBTA: Enabling Single-GPU End-to-End Gigapixel WSI Classification with Feature Bridging and Translation Alignment: Jiuyang Dong,

Jiahan Li,

Junjun Jiang,

Yongbing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Jiuyang and Li, Jiahan and Jiang, Junjun and Zhang, Yongbing}, title = {FBTA: Enabling Single-GPU End-to-End Gigapixel WSI Classification with Feature Bridging and Translation Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7026-7035} }
Prompt-Free Unknown Label Generation for Open World Detection in Remote Sensing: Abdullah Azeem,

Ruisheng Wang,

Qingquan Li,

Abubakar Siddique; [pdf] [supp]
[bibtex]
@InProceedings{Azeem_2026_CVPR, author = {Azeem, Abdullah and Wang, Ruisheng and Li, Qingquan and Siddique, Abubakar}, title = {Prompt-Free Unknown Label Generation for Open World Detection in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34662-34672} }
Beyond the Golden Data: Resolving the Motion-Vision Quality Dilemma via Timestep Selective Training: Xiangyang Luo,

Qingyu Li,

Yuming Li,

Guanbo Huang,

Yongjie Zhu,

Wenyu Qin,

Meng Wang,

Pengfei Wan,

Shao-Lun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Xiangyang and Li, Qingyu and Li, Yuming and Huang, Guanbo and Zhu, Yongjie and Qin, Wenyu and Wang, Meng and Wan, Pengfei and Huang, Shao-Lun}, title = {Beyond the Golden Data: Resolving the Motion-Vision Quality Dilemma via Timestep Selective Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43440-43449} }
Agile Deliberation: Concept Deliberation for Subjective Visual Classification: Leijie Wang,

Otilia Stretcu,

Wei Qiao,

Thomas Denby,

Krishnamurthy Viswanathan,

Enming Luo,

Chun-Ta Lu,

Tushar Dogra,

Ranjay Krishna,

Ariel Fuxman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Leijie and Stretcu, Otilia and Qiao, Wei and Denby, Thomas and Viswanathan, Krishnamurthy and Luo, Enming and Lu, Chun-Ta and Dogra, Tushar and Krishna, Ranjay and Fuxman, Ariel}, title = {Agile Deliberation: Concept Deliberation for Subjective Visual Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4794-4804} }
WISER: Wider Search, Deeper Thinking, and Adaptive Fusion for Training-Free Zero-Shot Composed Image Retrieval: Tianyue Wang,

Leigang Qu,

Tianyu Yang,

Xiangzhao Hao,

Yifan Xu,

Haiyun Guo,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Tianyue and Qu, Leigang and Yang, Tianyu and Hao, Xiangzhao and Xu, Yifan and Guo, Haiyun and Wang, Jinqiao}, title = {WISER: Wider Search, Deeper Thinking, and Adaptive Fusion for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16865-16875} }
EMMA: Extracting Multiple physical parameters from Multimodal Data: Farhat Shaikh,

Ayan Banerjee,

Sandeep Gupta; [pdf] [supp]
[bibtex]
@InProceedings{Shaikh_2026_CVPR, author = {Shaikh, Farhat and Banerjee, Ayan and Gupta, Sandeep}, title = {EMMA: Extracting Multiple physical parameters from Multimodal Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1716-1725} }
TraceGen: World Modeling in 3D Trace Space Enables Learning from Cross-Embodiment Videos: Seungjae Lee,

Yoonkyo Jung,

Inkook Chun,

Yao-Chih Lee,

Zikui Cai,

Hongjia Huang,

Aayush Talreja,

Tan Dao,

Yongyuan Liang,

Jia-Bin Huang,

Furong Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seungjae and Jung, Yoonkyo and Chun, Inkook and Lee, Yao-Chih and Cai, Zikui and Huang, Hongjia and Talreja, Aayush and Dao, Tan and Liang, Yongyuan and Huang, Jia-Bin and Huang, Furong}, title = {TraceGen: World Modeling in 3D Trace Space Enables Learning from Cross-Embodiment Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20721-20731} }
MS-Temba: Multi-Scale Temporal Mamba for Understanding Long Untrimmed Videos: Arkaprava Sinha,

Monish Soundar Raj,

Pu Wang,

Ahmed Helmy,

Hieu Le,

Srijan Das; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sinha_2026_CVPR, author = {Sinha, Arkaprava and Raj, Monish Soundar and Wang, Pu and Helmy, Ahmed and Le, Hieu and Das, Srijan}, title = {MS-Temba: Multi-Scale Temporal Mamba for Understanding Long Untrimmed Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9815-9826} }
HAVE-Bench: Hierarchical Audio-Visual Evaluation from Perception to Interaction: Muyan Zhong,

Erfei Cui,

Sen Xing,

Weiyun Wang,

Wen Wu,

Yuchen Hu,

Yanting Zhang,

Xiaowei Hu,

Wenhai Wang,

Chao Zhang,

Jifeng Dai; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Muyan and Cui, Erfei and Xing, Sen and Wang, Weiyun and Wu, Wen and Hu, Yuchen and Zhang, Yanting and Hu, Xiaowei and Wang, Wenhai and Zhang, Chao and Dai, Jifeng}, title = {HAVE-Bench: Hierarchical Audio-Visual Evaluation from Perception to Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8801-8812} }
Diffusion Probe: Generated Image Result Prediction Using CNN Probes: Bukun Huang,

Benlei Cui,

Zhizeng Ye,

Xuemei Dong,

Tuo Chen,

Hui Xue,

Dingkang Yang,

Longtao Huang,

Haiwen Hong,

Jingqun Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Bukun and Cui, Benlei and Ye, Zhizeng and Dong, Xuemei and Chen, Tuo and Xue, Hui and Yang, Dingkang and Huang, Longtao and Hong, Haiwen and Tang, Jingqun}, title = {Diffusion Probe: Generated Image Result Prediction Using CNN Probes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35926-35935} }
Frequency Switching Mechanism for Parameter-Efficient Multi-Task Learning: Shih-Wen Liu,

Yen-Chang Chen,

Wei-Ta Chu,

Fu-En Yang,

Yu-Chiang Frank Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shih-Wen and Chen, Yen-Chang and Chu, Wei-Ta and Yang, Fu-En and Wang, Yu-Chiang Frank}, title = {Frequency Switching Mechanism for Parameter-Efficient Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20273-20282} }
FluidGaussian: Propagating Simulation-Based Uncertainty Toward Functionally-Intelligent 3D Reconstruction: Yuqiu Liu,

Jialin Song,

Marissa Ramirez de Chanlatte,

Rochishnu Chowdhury,

Rushil Paresh Desai,

Wuyang Chen,

Daniel Martin,

Michael W. Mahoney; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuqiu and Song, Jialin and de Chanlatte, Marissa Ramirez and Chowdhury, Rochishnu and Desai, Rushil Paresh and Chen, Wuyang and Martin, Daniel and Mahoney, Michael W.}, title = {FluidGaussian: Propagating Simulation-Based Uncertainty Toward Functionally-Intelligent 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15421-15431} }
UAST: Unified Active Search and Tracking for Arbitrary Targets with UAVs: Liang Qin,

Min Wang,

Xingyu Lu,

Aowen Qiu,

Wengang Zhou,

Houqiang Li; [pdf]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Liang and Wang, Min and Lu, Xingyu and Qiu, Aowen and Zhou, Wengang and Li, Houqiang}, title = {UAST: Unified Active Search and Tracking for Arbitrary Targets with UAVs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13464-13473} }
Cross-modal Representation Learning for Diffusion-generated Image Detection: Tao Gong,

Dayong Wang,

Qi Chu,

Bin Liu,

Nenghai Yu; [pdf]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Tao and Wang, Dayong and Chu, Qi and Liu, Bin and Yu, Nenghai}, title = {Cross-modal Representation Learning for Diffusion-generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36092-36102} }
Describe Anything Anywhere At Any Moment: Nicolas Gorlo,

Lukas Schmid,

Luca Carlone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gorlo_2026_CVPR, author = {Gorlo, Nicolas and Schmid, Lukas and Carlone, Luca}, title = {Describe Anything Anywhere At Any Moment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35002-35013} }
TerraSeg: Self-Supervised Ground Segmentation for Any LiDAR: Ted Lentsch,

Santiago Montiel-Marín,

Holger Caesar,

Dariu M. Gavrila; [pdf] [supp]
[bibtex]
@InProceedings{Lentsch_2026_CVPR, author = {Lentsch, Ted and Montiel-Mar{\'\i}n, Santiago and Caesar, Holger and Gavrila, Dariu M.}, title = {TerraSeg: Self-Supervised Ground Segmentation for Any LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10040-10050} }
META: Meta Evolution of Tool Trajectory Adaptation for Long-Video Understanding: Jing Huang,

Luyuan Chen,

Zhijie Xu,

Yadong Li,

Xingzhong Xu,

Siye Chen,

Jie Liu,

Ming Kong,

Qiang Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jing and Chen, Luyuan and Xu, Zhijie and Li, Yadong and Xu, Xingzhong and Chen, Siye and Liu, Jie and Kong, Ming and Zhu, Qiang}, title = {META: Meta Evolution of Tool Trajectory Adaptation for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9837-9846} }
Wanderland: Geometrically Grounded Simulation for Open-World Embodied AI: Xinhao Liu,

Jiaqi Li,

Youming Deng,

Ruxin Chen,

Yingjia Zhang,

Yifei Ma,

Li Guo,

Yiming Li,

Jing Zhang,

Chen Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinhao and Li, Jiaqi and Deng, Youming and Chen, Ruxin and Zhang, Yingjia and Ma, Yifei and Guo, Li and Li, Yiming and Zhang, Jing and Feng, Chen}, title = {Wanderland: Geometrically Grounded Simulation for Open-World Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1041-1052} }
Mining Attribute Subspaces for Efficient Fine-tuning of 3D Foundation Models: Yu Jiang,

Hanwen Jiang,

Ahmed Abdelkader,

Wen-Sheng Chu,

Brandon Feng,

Zhangyang Wang,

Qixing Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yu and Jiang, Hanwen and Abdelkader, Ahmed and Chu, Wen-Sheng and Feng, Brandon and Wang, Zhangyang and Huang, Qixing}, title = {Mining Attribute Subspaces for Efficient Fine-tuning of 3D Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29071-29080} }
Rethinking Box Supervision: Bias-Free Weakly Supervised Medical Segmentation: Jun Wei,

Hui Huang; [pdf]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jun and Huang, Hui}, title = {Rethinking Box Supervision: Bias-Free Weakly Supervised Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8525-8534} }
CVA: Context-aware Video-text Alignment for Video Temporal Grounding: Sungho Moon,

Seunghun Lee,

Jiwan Seo,

Sunghoon Im; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, Sungho and Lee, Seunghun and Seo, Jiwan and Im, Sunghoon}, title = {CVA: Context-aware Video-text Alignment for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17578-17587} }
PAS: A Training-Free Stabilizer for Temporal Encoding in Video LLMs: Bowen Sun,

Yujun Cai,

Ming-Hsuan Yang,

Hang Wu,

Yiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Bowen and Cai, Yujun and Yang, Ming-Hsuan and Wu, Hang and Wang, Yiwei}, title = {PAS: A Training-Free Stabilizer for Temporal Encoding in Video LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14471-14480} }
Visual Document Understanding and Reasoning: A Multi-Agent Collaboration Framework with Agent-Wise Adaptive Test-Time Scaling: Xinlei Yu,

Chengming Xu,

Zhangquan Chen,

Yudong Zhang,

Shilin Lu,

Cheng Yang,

Jiangning Zhang,

Shuicheng Yan,

Xiaobin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xinlei and Xu, Chengming and Chen, Zhangquan and Zhang, Yudong and Lu, Shilin and Yang, Cheng and Zhang, Jiangning and Yan, Shuicheng and Hu, Xiaobin}, title = {Visual Document Understanding and Reasoning: A Multi-Agent Collaboration Framework with Agent-Wise Adaptive Test-Time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12300-12311} }
Beyond Static Frames: Temporal Aggregate-and-Restore Vision Transformer for Human Pose Estimation: Hongwei Fang,

Jiahang Cai,

Xun Wang,

Wenwu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Hongwei and Cai, Jiahang and Wang, Xun and Yang, Wenwu}, title = {Beyond Static Frames: Temporal Aggregate-and-Restore Vision Transformer for Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42891-42900} }
Electromagnetic Inverse Scattering from a Single Transmitter: Yizhe Cheng,

Chunxun Tian,

Haoru Wang,

Wentao Zhu,

Xiaoxuan Ma,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yizhe and Tian, Chunxun and Wang, Haoru and Zhu, Wentao and Ma, Xiaoxuan and Wang, Yizhou}, title = {Electromagnetic Inverse Scattering from a Single Transmitter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34040-34049} }
Bridging Human Evaluation to Infrared and Visible Image Fusion: Jinyuan Liu,

Xingyuan Li,

Qingyun Mei,

Haoyuan Xu,

Zhiying Jiang,

Long Ma,

Risheng Liu,

Xin Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinyuan and Li, Xingyuan and Mei, Qingyun and Xu, Haoyuan and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {Bridging Human Evaluation to Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12322-12333} }
CanonCGT: Reference-Based Color Grading via Canonical Pivot Representation: Jinwon Ko,

Keunsoo Ko,

Chang-Su Kim; [pdf] [supp]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Jinwon and Ko, Keunsoo and Kim, Chang-Su}, title = {CanonCGT: Reference-Based Color Grading via Canonical Pivot Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15486-15495} }
Every Error has Its Magnitude: Asymmetric Mistake Severity Training for Multiclass Multiple Instance Learning: Sungrae Hong,

Jiwon Jeong,

Jisu Shin,

Donghee Han,

Sol Lee,

Kyungeun Kim,

Mun Yong Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Sungrae and Jeong, Jiwon and Shin, Jisu and Han, Donghee and Lee, Sol and Kim, Kyungeun and Yi, Mun Yong}, title = {Every Error has Its Magnitude: Asymmetric Mistake Severity Training for Multiclass Multiple Instance Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28244-28253} }
Thinking in 360deg: Humanoid Visual Search in the Wild: Heyang Yu,

Yinan Han,

Xiangyu Zhang,

Baiqiao Yin,

Bowen Chang,

Xiangyu Han,

Xinhao Liu,

Jing Zhang,

Marco Pavone,

Chen Feng,

Saining Xie,

Yiming Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Heyang and Han, Yinan and Zhang, Xiangyu and Yin, Baiqiao and Chang, Bowen and Han, Xiangyu and Liu, Xinhao and Zhang, Jing and Pavone, Marco and Feng, Chen and Xie, Saining and Li, Yiming}, title = {Thinking in 360deg: Humanoid Visual Search in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22445-22455} }
Towards Unified Human Perception and Machine Understanding: Token Flow Guided Compression Framework: Li Xu,

Yingfu Zhang,

Kepeng Xu,

Gang He,

Yunsong Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Li and Zhang, Yingfu and Xu, Kepeng and He, Gang and Li, Yunsong}, title = {Towards Unified Human Perception and Machine Understanding: Token Flow Guided Compression Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17630-17640} }
Physically-Grounded Turbulence Mitigation with Frame-Shared Degradation Parameters: Dongxin Xie,

Yan Huang,

Yong Xu,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Dongxin and Huang, Yan and Xu, Yong and Ji, Hui}, title = {Physically-Grounded Turbulence Mitigation with Frame-Shared Degradation Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29919-29928} }
Exploring the Underwater World Segmentation without Extra Training: Bingyu Li,

Tao Huo,

Da Zhang,

Zhiyuan Zhao,

Junyu Gao,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingyu and Huo, Tao and Zhang, Da and Zhao, Zhiyuan and Gao, Junyu and Li, Xuelong}, title = {Exploring the Underwater World Segmentation without Extra Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39879-39889} }
Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting: Xinhang Liu,

Pedro Miraldo,

Suhas Lohit,

Huaizu Jiang,

Naoko Sawada,

Yu-Wing Tai,

Chi-Keung Tang,

Moitreya Chatterjee; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinhang and Miraldo, Pedro and Lohit, Suhas and Jiang, Huaizu and Sawada, Naoko and Tai, Yu-Wing and Tang, Chi-Keung and Chatterjee, Moitreya}, title = {Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14600-14611} }
Speeding Up the Learning of 3D Gaussians with Much Shorter Gaussian Lists: Jiaqi Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Han, Zhizhong}, title = {Speeding Up the Learning of 3D Gaussians with Much Shorter Gaussian Lists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1231-1240} }
HOLO: Homography-Guided Pose Estimator Network for Fine-Grained Visual Localization on SD Maps: Xuchang Zhong,

Xu Cao,

Jinke Feng,

Hao Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Xuchang and Cao, Xu and Feng, Jinke and Fang, Hao}, title = {HOLO: Homography-Guided Pose Estimator Network for Fine-Grained Visual Localization on SD Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41376-41385} }
Guiding a Diffusion Transformer with the Internal Dynamics of Itself: Xingyu Zhou,

Qifan Li,

Xiaobin Hu,

Hai Chen,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xingyu and Li, Qifan and Hu, Xiaobin and Chen, Hai and Gu, Shuhang}, title = {Guiding a Diffusion Transformer with the Internal Dynamics of Itself}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11536-11545} }
OVOD-Agent: A Markov-Bandit Framework for Proactive Visual Reasoning and Self-Evolving Detection: Chujie Wang,

Jianyu Lu,

Zhiyuan Luo,

Xi Chen,

Chu He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chujie and Lu, Jianyu and Luo, Zhiyuan and Chen, Xi and He, Chu}, title = {OVOD-Agent: A Markov-Bandit Framework for Proactive Visual Reasoning and Self-Evolving Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41416-41425} }
Generalizable Video Quality Assessment via Weak-to-Strong Learning: Linhan Cao,

Wei Sun,

Xiangyang Zhu,

Kaiwei Zhang,

Jun Jia,

Yicong Peng,

Dandan Zhu,

Guangtao Zhai,

Xiongkuo Min; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Linhan and Sun, Wei and Zhu, Xiangyang and Zhang, Kaiwei and Jia, Jun and Peng, Yicong and Zhu, Dandan and Zhai, Guangtao and Min, Xiongkuo}, title = {Generalizable Video Quality Assessment via Weak-to-Strong Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25578-25588} }
CAST: Context-Aware Dynamic Latent Space Transformation for Interactive Text-to-Image Retrieval: Xuanzuo Lin,

Min Zhang,

Daizong Liu,

Zhiwen Zuo,

Xun Yang,

Changting Lin,

Xun Wang,

Jianfeng Dong; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Xuanzuo and Zhang, Min and Liu, Daizong and Zuo, Zhiwen and Yang, Xun and Lin, Changting and Wang, Xun and Dong, Jianfeng}, title = {CAST: Context-Aware Dynamic Latent Space Transformation for Interactive Text-to-Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38794-38803} }
DiG: Differential Grounding for Enhancing Fine-Grained Perception in Multimodal Large Language Models: Zhou Tao,

Shida Wang,

YongXiang Hua,

Haoyu Cao,

Linli Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Zhou and Wang, Shida and Hua, YongXiang and Cao, Haoyu and Xu, Linli}, title = {DiG: Differential Grounding for Enhancing Fine-Grained Perception in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1695-1705} }
From Failure to Feedback: Group Revision Unlocks Hard Cases in Object-Level Grounding: Yuyuan Liu,

Yiping Ji,

Anjie Le,

Jiayuan Zhu,

Jiazhen Pan,

Can Peng,

Jiajun Deng,

Fengbei Liu,

Junde Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Ji, Yiping and Le, Anjie and Zhu, Jiayuan and Pan, Jiazhen and Peng, Can and Deng, Jiajun and Liu, Fengbei and Wu, Junde}, title = {From Failure to Feedback: Group Revision Unlocks Hard Cases in Object-Level Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4816-4828} }
VisPlay: Self-Evolving Vision-Language Models: Yicheng He,

Chengsong Huang,

Zongxia Li,

Jiaxin Huang,

Yonghui Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yicheng and Huang, Chengsong and Li, Zongxia and Huang, Jiaxin and Yang, Yonghui}, title = {VisPlay: Self-Evolving Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26274-26284} }
DeepScan: A Training-Free Framework for Visually Grounded Reasoning in Large Vision-Language Models: Yangfu Li,

Hongjian Zhan,

Jiawei Chen,

Yuning Gong,

Qi Liu,

Yue Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yangfu and Zhan, Hongjian and Chen, Jiawei and Gong, Yuning and Liu, Qi and Lu, Yue}, title = {DeepScan: A Training-Free Framework for Visually Grounded Reasoning in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19155-19164} }
APEX: A Decoupled Memory-based Explorer for Asynchronous Aerial Object Goal Navigation: Daoxuan Zhang,

Ping Chen,

Xiaobo Xia,

Xiu Su,

Ruichen Zhen,

Jianqiang Xiao,

Shuo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Daoxuan and Chen, Ping and Xia, Xiaobo and Su, Xiu and Zhen, Ruichen and Xiao, Jianqiang and Yang, Shuo}, title = {APEX: A Decoupled Memory-based Explorer for Asynchronous Aerial Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15232-15242} }
EE-RL: Vision Language Guided Reinforcement Learning with Explorer and Expert model for End-to-End Autonomous Driving: Xiaolong Li,

Lan Yang,

Ruyang Li,

Shan Fang,

Yang Liu,

Xiangmo Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaolong and Yang, Lan and Li, Ruyang and Fang, Shan and Liu, Yang and Zhao, Xiangmo}, title = {EE-RL: Vision Language Guided Reinforcement Learning with Explorer and Expert model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32082-32092} }
Consistency Beyond Contrast: Enhancing Open-Vocabulary Object Detection Robustness via Contextual Consistency Learning: Bozhao Li,

Shaocong Wu,

Tong Shao,

Senqiao Yang,

Qiben Shan,

Zhuotao Tian,

Jingyong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bozhao and Wu, Shaocong and Shao, Tong and Yang, Senqiao and Shan, Qiben and Tian, Zhuotao and Su, Jingyong}, title = {Consistency Beyond Contrast: Enhancing Open-Vocabulary Object Detection Robustness via Contextual Consistency Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34617-34627} }
NeuROK: Generative 4D Neural Object Kinematics: Chen Geng,

Guangzhao He,

Yue Gao,

Yunzhi Zhang,

Shangzhe Wu,

Jiajun Wu; [pdf]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Chen and He, Guangzhao and Gao, Yue and Zhang, Yunzhi and Wu, Shangzhe and Wu, Jiajun}, title = {NeuROK: Generative 4D Neural Object Kinematics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39239-39251} }
Rationale-Enhanced Decoding for Multi-modal Chain-of-Thought: Shin'ya Yamaguchi,

Kosuke Nishida,

Daiki Chijiwa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamaguchi_2026_CVPR, author = {Yamaguchi, Shin'ya and Nishida, Kosuke and Chijiwa, Daiki}, title = {Rationale-Enhanced Decoding for Multi-modal Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19241-19252} }
D$^2$-FOSA: Dual-Diffusion Guided EEG-to-Image Reconstruction with Frequency-Oriented Semantic Alignment: Chenglong Yu,

Shuai Shen,

Xiangsheng Li,

Yang Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Chenglong and Shen, Shuai and Li, Xiangsheng and Li, Yang}, title = {D\${\textasciicircum}2\$-FOSA: Dual-Diffusion Guided EEG-to-Image Reconstruction with Frequency-Oriented Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26698-26710} }
Spherical Voronoi: Directional Appearance as a Differentiable Partition of the Sphere: Francesco Di Sario,

Daniel Rebain,

Dor Verbin,

Marco Grangetto,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Di_Sario_2026_CVPR, author = {Di Sario, Francesco and Rebain, Daniel and Verbin, Dor and Grangetto, Marco and Tagliasacchi, Andrea}, title = {Spherical Voronoi: Directional Appearance as a Differentiable Partition of the Sphere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22529-22538} }
Understanding Counting Mechanisms in Large Language and Vision-Language Models: Hosein Hasani,

Amirmohammad Izadi,

Fatemeh Askari,

Mobin Bagherian,

Sadegh Mohammadian,

Mohammad Izadi,

Mahdieh Soleymani Baghshah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hasani_2026_CVPR, author = {Hasani, Hosein and Izadi, Amirmohammad and Askari, Fatemeh and Bagherian, Mobin and Mohammadian, Sadegh and Izadi, Mohammad and Baghshah, Mahdieh Soleymani}, title = {Understanding Counting Mechanisms in Large Language and Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5125-5133} }
Test-time Ego-Exo-centric Adaptation for Action Anticipation via Multi-Label Prototype Growing and Dual-Clue Consistency: Zhaofeng Shi,

Heqian Qiu,

Lanxiao Wang,

Qingbo Wu,

Fanman Meng,

Lili Pan,

Hongliang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhaofeng and Qiu, Heqian and Wang, Lanxiao and Wu, Qingbo and Meng, Fanman and Pan, Lili and Li, Hongliang}, title = {Test-time Ego-Exo-centric Adaptation for Action Anticipation via Multi-Label Prototype Growing and Dual-Clue Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16988-16999} }
DreamStereo: Towards Real-Time Stereo Inpainting for HD Videos: Yuan Huang,

Sijie Zhao,

Jing Cheng,

Hao Xu,

Shaohui Jiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yuan and Zhao, Sijie and Cheng, Jing and Xu, Hao and Jiao, Shaohui}, title = {DreamStereo: Towards Real-Time Stereo Inpainting for HD Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25393-25402} }
MMDIR: Multimodal Instruction-Driven Framework for Mixed-Degradation Document Image Restoration: Heng Li,

Xingyuan Wang,

Yang Fan,

Yunan Zhang,

Xiangping Wu,

Qingcai Chen; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Heng and Wang, Xingyuan and Fan, Yang and Zhang, Yunan and Wu, Xiangping and Chen, Qingcai}, title = {MMDIR: Multimodal Instruction-Driven Framework for Mixed-Degradation Document Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8387-8396} }
ID-Sim: An Identity-Focused Similarity Metric: Julia Chae,

Nicholas Kolkin,

Jui-Hsien Wang,

Richard Zhang,

Sara Beery,

Cusuh Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chae_2026_CVPR, author = {Chae, Julia and Kolkin, Nicholas and Wang, Jui-Hsien and Zhang, Richard and Beery, Sara and Ham, Cusuh}, title = {ID-Sim: An Identity-Focused Similarity Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11250-11262} }
Free-Lunch Long Video Generation via Layer-Adaptive O.O.D Correction: Jiahao Tian,

Chenxi Song,

Wei Cheng,

Chi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Jiahao and Song, Chenxi and Cheng, Wei and Zhang, Chi}, title = {Free-Lunch Long Video Generation via Layer-Adaptive O.O.D Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1973-1982} }
Streaming Video Instruction Tuning: Jiaer Xia,

Peixian Chen,

Mengdan Zhang,

Xing Sun,

Kaiyang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Jiaer and Chen, Peixian and Zhang, Mengdan and Sun, Xing and Zhou, Kaiyang}, title = {Streaming Video Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31219-31229} }
Boosting Reasoning in Large Multimodal Models via Activation Replay: Yun Xing,

Xiaobin Hu,

Qingdong He,

Jiangning Zhang,

Shuicheng Yan,

Shijian Lu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Yun and Hu, Xiaobin and He, Qingdong and Zhang, Jiangning and Yan, Shuicheng and Lu, Shijian and Jiang, Yu-Gang}, title = {Boosting Reasoning in Large Multimodal Models via Activation Replay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19229-19240} }
Black-box Membership Inference Attacks on the Pre-training Data of Image-generation Models: Tao Qi,

Huili Wang,

Yuanhong Huang,

Wendan Wang,

Lianchao Zhao,

Jinrui Wang,

Zichen Qin,

Shangguang Wang,

Yongfeng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Tao and Wang, Huili and Huang, Yuanhong and Wang, Wendan and Zhao, Lianchao and Wang, Jinrui and Qin, Zichen and Wang, Shangguang and Huang, Yongfeng}, title = {Black-box Membership Inference Attacks on the Pre-training Data of Image-generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {62-71} }
Enhancing Part-Level Point Grounding for Any Open-Source MLLMs: Jin-Cheng Jhang,

Fu-En Wang,

Xin Yang,

Nan Qiao,

Lu Xia,

Min Sun,

Cheng-Hao Kuo; [pdf] [supp]
[bibtex]
@InProceedings{Jhang_2026_CVPR, author = {Jhang, Jin-Cheng and Wang, Fu-En and Yang, Xin and Qiao, Nan and Xia, Lu and Sun, Min and Kuo, Cheng-Hao}, title = {Enhancing Part-Level Point Grounding for Any Open-Source MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22900-22909} }
Detecting AI-Generated Forgeries via Iterative Manifold Deviation Amplification: Jiangling Zhang,

Shuxuan Gao,

Bofan Liu,

Siqiang Feng,

Jirui Huang,

Yaxiong Chen,

Ziyu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiangling and Gao, Shuxuan and Liu, Bofan and Feng, Siqiang and Huang, Jirui and Chen, Yaxiong and Chen, Ziyu}, title = {Detecting AI-Generated Forgeries via Iterative Manifold Deviation Amplification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35494-35503} }
RoadSceneBench: A Lightweight Benchmark for Mid-Level Road Scene Understanding: Xiyan Liu,

Han Wang,

Yuhu Wang,

Junjie Cai,

Zhe Cao,

Jianzhong Yang,

Zhen Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiyan and Wang, Han and Wang, Yuhu and Cai, Junjie and Cao, Zhe and Yang, Jianzhong and Lu, Zhen}, title = {RoadSceneBench: A Lightweight Benchmark for Mid-Level Road Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23720-23729} }
OneCAT: Decoder-Only Auto-Regressive Model for Unified Understanding and Generation: Han Li,

Xinyu Peng,

Yaoming Wang,

Zelin Peng,

Xin Chen,

Rongxiang Weng,

Jingang Wang,

Xunliang Cai,

Wenrui Dai,

Hongkai Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Han and Peng, Xinyu and Wang, Yaoming and Peng, Zelin and Chen, Xin and Weng, Rongxiang and Wang, Jingang and Cai, Xunliang and Dai, Wenrui and Xiong, Hongkai}, title = {OneCAT: Decoder-Only Auto-Regressive Model for Unified Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30235-30245} }
A Closed-Form Solution for Debiasing Vision-Language Models with Utility Guarantees Across Modalities and Tasks: Tangzheng Lian,

Guanyu Hu,

Yijing Ren,

Dimitrios Kollias,

Oya Celiktutan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Tangzheng and Hu, Guanyu and Ren, Yijing and Kollias, Dimitrios and Celiktutan, Oya}, title = {A Closed-Form Solution for Debiasing Vision-Language Models with Utility Guarantees Across Modalities and Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31672-31682} }
Vista4D: Video Reshooting with 4D Point Clouds: Kuan Heng Lin,

Zhizheng Liu,

Pablo Salamanca,

Yash Kant,

Ryan Burgert,

Yuancheng Xu,

Koichi Namekata,

Yiwei Zhao,

Bolei Zhou,

Micah Goldblum,

Paul Debevec,

Ning Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Kuan Heng and Liu, Zhizheng and Salamanca, Pablo and Kant, Yash and Burgert, Ryan and Xu, Yuancheng and Namekata, Koichi and Zhao, Yiwei and Zhou, Bolei and Goldblum, Micah and Debevec, Paul and Yu, Ning}, title = {Vista4D: Video Reshooting with 4D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32671-32682} }
LookasideVLN: Direction-Aware Aerial Vision-and-Language Navigation: Yuwei Ning,

Ganlong Zhao,

Yipeng Qin,

Si Liu,

Yang Liu,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Yuwei and Zhao, Ganlong and Qin, Yipeng and Liu, Si and Liu, Yang and Lin, Liang and Li, Guanbin}, title = {LookasideVLN: Direction-Aware Aerial Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32441-32450} }
Tracking through Severe Occlusion via Event-Derived Transient Cues: Hao Dong,

Yujin Liu,

Haoyue Liu,

Zhenyu Wang,

Shihan Peng,

Zhiwei Shi,

Yi Chang,

Luxin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Hao and Liu, Yujin and Liu, Haoyue and Wang, Zhenyu and Peng, Shihan and Shi, Zhiwei and Chang, Yi and Yan, Luxin}, title = {Tracking through Severe Occlusion via Event-Derived Transient Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29526-29536} }
Inconsistency-aware Multimodal Schrodinger Bridge for Deepfake Localization: Jiayu Xiong,

Jing Wang,

Qi Zhang,

Wanlong Wang,

Jun Xue; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Jiayu and Wang, Jing and Zhang, Qi and Wang, Wanlong and Xue, Jun}, title = {Inconsistency-aware Multimodal Schrodinger Bridge for Deepfake Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8697-8706} }
From Inpainting to Layer Decomposition: Repurposing Generative Inpainting Models for Image Layer Decomposition: Jingxi Chen,

Yixiao Zhang,

Xiaoye Qian,

Zongxia Li,

Cornelia Fermuller,

Caren Chen,

Yiannis Aloimonos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jingxi and Zhang, Yixiao and Qian, Xiaoye and Li, Zongxia and Fermuller, Cornelia and Chen, Caren and Aloimonos, Yiannis}, title = {From Inpainting to Layer Decomposition: Repurposing Generative Inpainting Models for Image Layer Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16054-16063} }
AeroDGS: Physically Consistent Dynamic Gaussian Splatting for Single-Sequence Aerial 4D Reconstruction: Hanyang Liu,

Rongjun Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hanyang and Qin, Rongjun}, title = {AeroDGS: Physically Consistent Dynamic Gaussian Splatting for Single-Sequence Aerial 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19011-19021} }
ViTPrompt: Training-Free Prompt Refinement with Visual Tokens for Open-Vocabulary Detection: Yitong Qin,

Lihua Zhou,

Jiwei Wei,

Ran Ran,

Shiyuan He,

Zeyu Ma,

Shuaifeng Li,

Nianxin Li,

Heng Tao Shen; [pdf]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yitong and Zhou, Lihua and Wei, Jiwei and Ran, Ran and He, Shiyuan and Ma, Zeyu and Li, Shuaifeng and Li, Nianxin and Shen, Heng Tao}, title = {ViTPrompt: Training-Free Prompt Refinement with Visual Tokens for Open-Vocabulary Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3111-3121} }
FoleyDirector: Fine-Grained Temporal Steering for Video-to-Audio Generation via Structured Scripts: You Li,

Dewei Zhou,

Fan Ma,

Fu Li,

Dongliang He,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, You and Zhou, Dewei and Ma, Fan and Li, Fu and He, Dongliang and Yang, Yi}, title = {FoleyDirector: Fine-Grained Temporal Steering for Video-to-Audio Generation via Structured Scripts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29254-29264} }
SMVRT: Implicit Human 3D Modeling Using Sparse Multi-View Volumetric Reconstruction with Transformer Fusion: Chuanmao Fan,

Chenxi Zhao,

Ye Duan; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Chuanmao and Zhao, Chenxi and Duan, Ye}, title = {SMVRT: Implicit Human 3D Modeling Using Sparse Multi-View Volumetric Reconstruction with Transformer Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14556-14566} }
HCL-FF: Hierarchical and Contrastive Learning for Forward-Forward Algorithm: Jie-En Yao,

Hong-En Chen,

C.-C. Jay Kuo; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Jie-En and Chen, Hong-En and Kuo, C.-C. Jay}, title = {HCL-FF: Hierarchical and Contrastive Learning for Forward-Forward Algorithm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27429-27438} }
HumanBA: Human-Aware Bundle Adjustment via Global Human-Camera Decoupling: Fengyuan Yang,

Tanuj Sur,

Tze Ho Elden Tse,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Fengyuan and Sur, Tanuj and Tse, Tze Ho Elden and Yao, Angela}, title = {HumanBA: Human-Aware Bundle Adjustment via Global Human-Camera Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13846-13855} }
Stability-Driven Motion Generation for Object-Guided Human-Human Co-Manipulation: Jiahao Xu,

Xiaohan Yuan,

Xingchen Wu,

Chongyang Xu,

Kun Li,

Buzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiahao and Yuan, Xiaohan and Wu, Xingchen and Xu, Chongyang and Li, Kun and Huang, Buzhen}, title = {Stability-Driven Motion Generation for Object-Guided Human-Human Co-Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38355-38365} }
Beyond Heuristic Prompting: A Concept-Guided Bayesian Framework for Zero-Shot Image Recognition: Hui Liu,

Kecheng Chen,

Jialiang Wang,

Xianming Liu,

Wenya Wang,

Haoliang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hui and Chen, Kecheng and Wang, Jialiang and Liu, Xianming and Wang, Wenya and Li, Haoliang}, title = {Beyond Heuristic Prompting: A Concept-Guided Bayesian Framework for Zero-Shot Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5521-5531} }
Diagnosing and Repairing Unsafe Channels in Vision-Language Models via Causal Discovery and Dual-Modal Safety Subspace Projection: Jinhu Fu,

Yihang Lou,

Qingyi Si,

Shudong Zhang,

Sen Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Jinhu and Lou, Yihang and Si, Qingyi and Zhang, Shudong and Su, Sen}, title = {Diagnosing and Repairing Unsafe Channels in Vision-Language Models via Causal Discovery and Dual-Modal Safety Subspace Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31693-31702} }
Beyond Endpoints: Path-Centric Reasoning for Vectorized Off-Road Network Extraction: Wenfei Guan,

Jilin Mei,

Tong Shen,

Xumin Wu,

Shuo Wang,

Chen Min,

Yu Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Wenfei and Mei, Jilin and Shen, Tong and Wu, Xumin and Wang, Shuo and Min, Chen and Hu, Yu}, title = {Beyond Endpoints: Path-Centric Reasoning for Vectorized Off-Road Network Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13254-13263} }
Resolving the Stability-Plasticity Dilemma in Reinforcement Learning via Complementary Continual Critics: Bo Sun,

Peixi Peng,

Guang Tan,

Haoran Xu,

Yaokun Li,

Yiqian Chang,

Shuaixian Wang,

Luntong Li; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Bo and Peng, Peixi and Tan, Guang and Xu, Haoran and Li, Yaokun and Chang, Yiqian and Wang, Shuaixian and Li, Luntong}, title = {Resolving the Stability-Plasticity Dilemma in Reinforcement Learning via Complementary Continual Critics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22348-22357} }
Scaling Self-Supervised and Cross-Modal Pretraining for Volumetric CT Transformers: Cris Claessens,

Christiaan Viviers,

Giacomo D'Amicantonio,

Egor Bondarev,

Fons van der Sommen; [pdf] [supp]
[bibtex]
@InProceedings{Claessens_2026_CVPR, author = {Claessens, Cris and Viviers, Christiaan and D'Amicantonio, Giacomo and Bondarev, Egor and van der Sommen, Fons}, title = {Scaling Self-Supervised and Cross-Modal Pretraining for Volumetric CT Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13636-13647} }
Hierarchically Robust Zero-shot Vision-language Models: Junhao Dong,

Yifei Zhang,

Hao Zhu,

Yew-Soon Ong,

Piotr Koniusz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Junhao and Zhang, Yifei and Zhu, Hao and Ong, Yew-Soon and Koniusz, Piotr}, title = {Hierarchically Robust Zero-shot Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37642-37652} }
PACT: Phase-Like Transition Constraints in Adapter-Based Continual Learning of Vision-Language Models: Xuan Wang,

Guiguang Ding,

Jungong Han; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuan and Ding, Guiguang and Han, Jungong}, title = {PACT: Phase-Like Transition Constraints in Adapter-Based Continual Learning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17999-18009} }
AnchorFlow: Training-Free 3D Editing via Latent Anchor-Aligned Flows: Zhenglin Zhou,

Fan Ma,

Chengzhuo Gui,

Xiaobo Xia,

Hehe Fan,

Yi Yang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhenglin and Ma, Fan and Gui, Chengzhuo and Xia, Xiaobo and Fan, Hehe and Yang, Yi and Chua, Tat-Seng}, title = {AnchorFlow: Training-Free 3D Editing via Latent Anchor-Aligned Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14387-14397} }
Recurrent Video Masked Autoencoders: Daniel Zoran,

Nikhil Parthasarathy,

Yi Yang,

Drew A Hudson,

João Carreira,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zoran_2026_CVPR, author = {Zoran, Daniel and Parthasarathy, Nikhil and Yang, Yi and A Hudson, Drew and Carreira, Jo\~ao and Zisserman, Andrew}, title = {Recurrent Video Masked Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17744-17755} }
CLEP: Contrastive Language-Pose Pretraining: Sen Jia,

Huayu Wang,

Hsiang-Wei Huang,

Zhaochong An,

Jenq-Neng Hwang,

Huaping Zhang,

Lei Li; [pdf]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Sen and Wang, Huayu and Huang, Hsiang-Wei and An, Zhaochong and Hwang, Jenq-Neng and Zhang, Huaping and Li, Lei}, title = {CLEP: Contrastive Language-Pose Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30696-30706} }
SDDF: Specificity-Driven Dynamic Focusing for Open-Vocabulary Camouflaged Object Detection: Jiaming Liang,

Yifeng Zhan,

Chunlin Liu,

Weihua Zheng,

Bingye Peng,

Qiwei Liang,

Boyang Cai,

Xiaochun Mai,

Qiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Jiaming and Zhan, Yifeng and Liu, Chunlin and Zheng, Weihua and Peng, Bingye and Liang, Qiwei and Cai, Boyang and Mai, Xiaochun and Nie, Qiang}, title = {SDDF: Specificity-Driven Dynamic Focusing for Open-Vocabulary Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13049-13058} }
GardenDesigner: Encoding Aesthetic Principles into Jiangnan Garden Construction via a Chain of Agents: Mengtian Li,

Fan Yang,

Ruixue Xiong,

Yiyan Fan,

Zhifeng Xie,

Zeyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Yang, Fan and Xiong, Ruixue and Fan, Yiyan and Xie, Zhifeng and Wang, Zeyu}, title = {GardenDesigner: Encoding Aesthetic Principles into Jiangnan Garden Construction via a Chain of Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24417-24427} }
Lens Component Deletion based on Differentiable Ray Tracing: Wenguan Zhang,

Qirun Zhang,

Tuo Sun,

Jiajian He,

Jiahui Xu,

Huajun Feng,

Qi Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenguan and Zhang, Qirun and Sun, Tuo and He, Jiajian and Xu, Jiahui and Feng, Huajun and Li, Qi}, title = {Lens Component Deletion based on Differentiable Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5637-5646} }
MR. Illuminate: Zero-Shot Low-Light Image Enhancement with Diffusion Prior: Joshua Cho,

Sara Aghajanzadeh,

Zhen Zhu,

David Forsyth; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Joshua and Aghajanzadeh, Sara and Zhu, Zhen and Forsyth, David}, title = {MR. Illuminate: Zero-Shot Low-Light Image Enhancement with Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8460-8470} }
Beyond Caption-Based Queries in Video Moment Retrieval: David Pujol-Perich,

Albert Clapés,

Dima Damen,

Sergio Escalera,

Michael Wray; [pdf] [supp]
[bibtex]
@InProceedings{Pujol-Perich_2026_CVPR, author = {Pujol-Perich, David and Clap\'es, Albert and Damen, Dima and Escalera, Sergio and Wray, Michael}, title = {Beyond Caption-Based Queries in Video Moment Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18545-18554} }
AutoDebias: An Automated Framework for Detecting and Mitigating Backdoor Biases in Text-to-Image Models: Hongyi Cai,

Mohammad Mahdinur Rahman,

MingKang Dong,

Muxin Pu,

Moayad Aloqaily,

Jie Li,

Xinfeng Li,

Jialie Shen,

Meikang Qiu,

Qingsong Wen; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Hongyi and Rahman, Mohammad Mahdinur and Dong, MingKang and Pu, Muxin and Aloqaily, Moayad and Li, Jie and Li, Xinfeng and Shen, Jialie and Qiu, Meikang and Wen, Qingsong}, title = {AutoDebias: An Automated Framework for Detecting and Mitigating Backdoor Biases in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29285-29294} }
Vision-Oriented Lightweight Neural Architecture Search with Budget-Adaptive Evaluation: Yi Fan,

Yu-Bin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yi and Yang, Yu-Bin}, title = {Vision-Oriented Lightweight Neural Architecture Search with Budget-Adaptive Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41985-41995} }
NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting: Brent Zoomers,

Florian Hahlbohm,

Joni Vanherck,

Lode Jorissen,

Marcus Magnor,

Nick Michiels; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zoomers_2026_CVPR, author = {Zoomers, Brent and Hahlbohm, Florian and Vanherck, Joni and Jorissen, Lode and Magnor, Marcus and Michiels, Nick}, title = {NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29824-29833} }
CADC: Content Adaptive Diffusion-Based Generative Image Compression: Xihua Sheng,

Lingyu Zhu,

Tianyu Zhang,

Dong Liu,

Shiqi Wang,

Jing Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Xihua and Zhu, Lingyu and Zhang, Tianyu and Liu, Dong and Wang, Shiqi and Wang, Jing}, title = {CADC: Content Adaptive Diffusion-Based Generative Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32936-32946} }
GlyphPrinter: Region-Grouped Direct Preference Optimization for Glyph-Accurate Visual Text Rendering: Xincheng Shuai,

Ziye Li,

Henghui Ding,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shuai_2026_CVPR, author = {Shuai, Xincheng and Li, Ziye and Ding, Henghui and Tao, Dacheng}, title = {GlyphPrinter: Region-Grouped Direct Preference Optimization for Glyph-Accurate Visual Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7674-7683} }
TEXTRIX: Latent Attribute Grid for Native Texture Generation and Beyond: Yifei Zeng,

Yajie Bao,

Jiachen Qian,

Shuang Wu,

Youtian Lin,

Hao Zhu,

Buyu Li,

Feihu Zhang,

Xun Cao,

Yao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yifei and Bao, Yajie and Qian, Jiachen and Wu, Shuang and Lin, Youtian and Zhu, Hao and Li, Buyu and Zhang, Feihu and Cao, Xun and Yao, Yao}, title = {TEXTRIX: Latent Attribute Grid for Native Texture Generation and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27104-27113} }
CHEEM: Continual Learning by Reuse, New, Adapt and Skip - A Hierarchical Exploration-Exploitation Approach: Chinmay Savadikar,

Michelle Dai,

Tianfu Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Savadikar_2026_CVPR, author = {Savadikar, Chinmay and Dai, Michelle and Wu, Tianfu}, title = {CHEEM: Continual Learning by Reuse, New, Adapt and Skip - A Hierarchical Exploration-Exploitation Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25066-25076} }
F2Net: A Frequency-Fused Network for Ultra-High Resolution Remote Sensing Segmentation: Hengzhi Chen,

Liqian Feng,

Wenhua Wu,

Xiaogang Zhu,

Qiuxia Wu,

Lianlei Shan,

Kun Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hengzhi and Feng, Liqian and Wu, Wenhua and Zhu, Xiaogang and Wu, Qiuxia and Shan, Lianlei and Hu, Kun}, title = {F2Net: A Frequency-Fused Network for Ultra-High Resolution Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13275-13284} }
Mitigating Objectness Bias and Region-to-Text Misalignment for Open-Vocabulary Panoptic Segmentation: Nikolay Kormushev,

Josip Šarić,

Matej Kristan; [pdf] [supp]
[bibtex]
@InProceedings{Kormushev_2026_CVPR, author = {Kormushev, Nikolay and \v{S}ari\'c, Josip and Kristan, Matej}, title = {Mitigating Objectness Bias and Region-to-Text Misalignment for Open-Vocabulary Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17905-17915} }
Relational Visual Similarity: Thao Nguyen,

Sicheng Mo,

Krishna Kumar Singh,

Yilin Wang,

Jing Shi,

Nicholas Kolkin,

Eli Shechtman,

Yong Jae Lee,

Yuheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thao and Mo, Sicheng and Singh, Krishna Kumar and Wang, Yilin and Shi, Jing and Kolkin, Nicholas and Shechtman, Eli and Lee, Yong Jae and Li, Yuheng}, title = {Relational Visual Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24141-24150} }
Adversarial Style Optimization: Enhancing VLM Jailbreaks by GRPO-based Stylistic Triggers Optimization: Bingjun Luo,

Jialin Guo,

Yue Yao,

Xinpeng Ding; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Bingjun and Guo, Jialin and Yao, Yue and Ding, Xinpeng}, title = {Adversarial Style Optimization: Enhancing VLM Jailbreaks by GRPO-based Stylistic Triggers Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11-19} }
Evo-1: Lightweight Vision-Language-Action Model with Preserved Semantic Alignment: Tao Lin,

Yilei Zhong,

Yuxin Du,

Jingjing Zhang,

Jiting Liu,

Yinxinyu Chen,

Encheng Gu,

Ziyan Liu,

Hongyi Cai,

Yanwen Zou,

Lixing Zou,

Zhaoye Zhou,

Gen Li,

Bo Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Tao and Zhong, Yilei and Du, Yuxin and Zhang, Jingjing and Liu, Jiting and Chen, Yinxinyu and Gu, Encheng and Liu, Ziyan and Cai, Hongyi and Zou, Yanwen and Zou, Lixing and Zhou, Zhaoye and Li, Gen and Zhao, Bo}, title = {Evo-1: Lightweight Vision-Language-Action Model with Preserved Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13397-13406} }
Variational Graph-based Normal Integration: Lixiong Chen,

Bohan Yu,

Victor Adrian Prisacariu,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Lixiong and Yu, Bohan and Prisacariu, Victor Adrian and Sato, Imari}, title = {Variational Graph-based Normal Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12663-12672} }
ReMoE: Region-Mixture Experts for Adversarially-Robust Vision Transformers: Qinghao Zhong,

Bingzhi Chen,

Yishu Liu,

Minhua Lu,

Guangming Lu; [pdf]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Qinghao and Chen, Bingzhi and Liu, Yishu and Lu, Minhua and Lu, Guangming}, title = {ReMoE: Region-Mixture Experts for Adversarially-Robust Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37674-37683} }
Hyperbolic Gramian Volumes for Multimodal Alignment: Saiyang Na,

Feng Jiang,

Qifeng Zhou,

Wenliang Zhong,

Thao M. Dang,

Yuzhi Guo,

Hehuan Ma,

Chunyuan Li,

Weizhi An,

Junzhou Huang; [pdf] [supp]
[bibtex]
@InProceedings{Na_2026_CVPR, author = {Na, Saiyang and Jiang, Feng and Zhou, Qifeng and Zhong, Wenliang and Dang, Thao M. and Guo, Yuzhi and Ma, Hehuan and Li, Chunyuan and An, Weizhi and Huang, Junzhou}, title = {Hyperbolic Gramian Volumes for Multimodal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37756-37765} }
ProSoftArena: Benchmarking Hierarchical Capabilities of Multi-modal Agents in Professional Software Environments: Jiaxin Ai,

Yukang Feng,

Fanrui Zhang,

Jianwen Sun,

Zizhen Li,

Chuanhao Li,

Yifan Chang,

Wenxiao Wu,

Ruoxi Wang,

Mingliang Zhai,

Kaipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ai_2026_CVPR, author = {Ai, Jiaxin and Feng, Yukang and Zhang, Fanrui and Sun, Jianwen and Li, Zizhen and Li, Chuanhao and Chang, Yifan and Wu, Wenxiao and Wang, Ruoxi and Zhai, Mingliang and Zhang, Kaipeng}, title = {ProSoftArena: Benchmarking Hierarchical Capabilities of Multi-modal Agents in Professional Software Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34586-34595} }
EReCu: Pseudo-label Evolution Fusion and Refinement with Multi-Cue Learning for Unsupervised Camouflage Detection: Shuo Jiang,

Gaojia Zhang,

Min Tan,

Yufei Yin,

Gang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Shuo and Zhang, Gaojia and Tan, Min and Yin, Yufei and Pan, Gang}, title = {EReCu: Pseudo-label Evolution Fusion and Refinement with Multi-Cue Learning for Unsupervised Camouflage Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25547-25556} }
DynamicGTR: Leveraging Graph Topology Representation Preferences to Boost VLM Capabilities on Graph QAs: Yanbin Wei,

Jiangyue Yan,

Chun Kang,

Yang Chen,

Hua Liu,

James Kwok,

Yu Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yanbin and Yan, Jiangyue and Kang, Chun and Chen, Yang and Liu, Hua and Kwok, James and Zhang, Yu}, title = {DynamicGTR: Leveraging Graph Topology Representation Preferences to Boost VLM Capabilities on Graph QAs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40822-40832} }
BoostSLT: Boosting Sign Language Translation via a Plug-and-Play Diffusion-Based Semantic Enhancer: Changzhou Han,

Wanlun Ma,

Xi Tang,

Kun Hu,

Sheng Wen,

Yang Xiang; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Changzhou and Ma, Wanlun and Tang, Xi and Hu, Kun and Wen, Sheng and Xiang, Yang}, title = {BoostSLT: Boosting Sign Language Translation via a Plug-and-Play Diffusion-Based Semantic Enhancer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28828-28837} }
OpenVO: Open-World Visual Odometry with Temporal Dynamics Awareness: Phuc Nguyen,

Anh N. Nhu,

Ming C. Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Phuc and Nhu, Anh N. and Lin, Ming C.}, title = {OpenVO: Open-World Visual Odometry with Temporal Dynamics Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14208-14218} }
Learning and Aligning Click-Aware Shape Prior for Interactive Amodal Instance Segmentation: Junjie Chen,

Junwei Lin,

Ren Hong,

Shengjie Liu,

Yuming Fang,

Feng Qian,

Yifan Zuo; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Lin, Junwei and Hong, Ren and Liu, Shengjie and Fang, Yuming and Qian, Feng and Zuo, Yifan}, title = {Learning and Aligning Click-Aware Shape Prior for Interactive Amodal Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20478-20487} }
Hg-I2P: Bridging Modalities for Generalizable Image-to-Point-Cloud Registration via Heterogeneous Graphs: Pei An,

Junfeng Ding,

Jiaqi Yang,

Yulong Wang,

Jie Ma,

Liangliang Nan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Pei and Ding, Junfeng and Yang, Jiaqi and Wang, Yulong and Ma, Jie and Nan, Liangliang}, title = {Hg-I2P: Bridging Modalities for Generalizable Image-to-Point-Cloud Registration via Heterogeneous Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39042-39051} }
A Multi-Agent Perception-Action Alliance for Efficient Long Video Reasoning: Yichang Xu,

Gaowen Liu,

Ramana Rao Kompella,

Tiansheng Huang,

Sihao Hu,

Fatih Ilhan,

Selim Furkan Tekin,

Zachary Yahn,

Ling Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yichang and Liu, Gaowen and Kompella, Ramana Rao and Huang, Tiansheng and Hu, Sihao and Ilhan, Fatih and Tekin, Selim Furkan and Yahn, Zachary and Liu, Ling}, title = {A Multi-Agent Perception-Action Alliance for Efficient Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19497-19507} }
UniEdit-I: Training-free Image Editing for Unified VLM via Iterative Understanding, Editing and Verifying: Chengyu Bai,

Jintao Chen,

Xiang Bai,

Yilong Chen,

Qi She,

Ming Lu,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Chengyu and Chen, Jintao and Bai, Xiang and Chen, Yilong and She, Qi and Lu, Ming and Zhang, Shanghang}, title = {UniEdit-I: Training-free Image Editing for Unified VLM via Iterative Understanding, Editing and Verifying}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29750-29759} }
ZOO-Prune: Training-Free Token Pruning via Zeroth-Order Gradient Estimation in Vision-Language Models: Youngeun Kim,

Youjia Zhang,

Huiling Liu,

Aecheon Jung,

Sunwoo Lee,

Sungeun Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Youngeun and Zhang, Youjia and Liu, Huiling and Jung, Aecheon and Lee, Sunwoo and Hong, Sungeun}, title = {ZOO-Prune: Training-Free Token Pruning via Zeroth-Order Gradient Estimation in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39572-39582} }
OntoAug: Rethinking Generative Data Augmentation via Ontology Guidance: Shuo Wang,

Zhichuan Wang,

Jun Luo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shuo and Wang, Zhichuan and Luo, Jun}, title = {OntoAug: Rethinking Generative Data Augmentation via Ontology Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22519-22528} }
EnergyAction: Unimanual to Bimanual Composition with Energy-Based Models: Mingchen Song,

Xiang Deng,

Jie Wei,

Dongmei Jiang,

Liqiang Nie,

Weili Guan; [pdf] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Mingchen and Deng, Xiang and Wei, Jie and Jiang, Dongmei and Nie, Liqiang and Guan, Weili}, title = {EnergyAction: Unimanual to Bimanual Composition with Energy-Based Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20845-20855} }
Immunizing Models Against Harmful Long-Horizon Fine-Tuning via Contractive Optimization Dynamics: Najibul Haque Sarker,

Zaber Ibn Abdul Hakim,

Ali Asgarov,

Chia-Wei Tang,

Alvi Md Ishmam,

Chris Thomas; [pdf] [supp]
[bibtex]
@InProceedings{Sarker_2026_CVPR, author = {Sarker, Najibul Haque and Ibn Abdul Hakim, Zaber and Asgarov, Ali and Tang, Chia-Wei and Ishmam, Alvi Md and Thomas, Chris}, title = {Immunizing Models Against Harmful Long-Horizon Fine-Tuning via Contractive Optimization Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34940-34949} }
Chain-of-Thought Guided Multi-Modal Object Re-Identification: Ya Gao,

Shihao Li,

Zhaojun Liu,

Aihua Zheng,

Chenglong Li,

Jin Tang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Ya and Li, Shihao and Liu, Zhaojun and Zheng, Aihua and Li, Chenglong and Tang, Jin}, title = {Chain-of-Thought Guided Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37705-37714} }
MimicTalker: A Multimodal Interactive and Memory-Enhanced Framework for Real-Time Dyadic 3D Head Generation: Yinuo Wang,

Yanbo Fan,

Xuan Wang,

Boyao Zhou,

Yu Guo,

Yujun Shen,

Fei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yinuo and Fan, Yanbo and Wang, Xuan and Zhou, Boyao and Guo, Yu and Shen, Yujun and Wang, Fei}, title = {MimicTalker: A Multimodal Interactive and Memory-Enhanced Framework for Real-Time Dyadic 3D Head Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32399-32409} }
S2D: Sparse to Dense Lifting for 3D Reconstruction with Minimal Inputs: Yuzhou Ji,

Qijian Tian,

He Zhu,

Xiaoqi Jiang,

Guangzhi Cao,

Lizhuang Ma,

Yuan Xie,

Xin Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Yuzhou and Tian, Qijian and Zhu, He and Jiang, Xiaoqi and Cao, Guangzhi and Ma, Lizhuang and Xie, Yuan and Tan, Xin}, title = {S2D: Sparse to Dense Lifting for 3D Reconstruction with Minimal Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7491-7502} }
From Rays to Projections: Better Inputs for Feed-Forward View Synthesis: Zirui Wu,

Zeren Jiang,

Martin R. Oswald,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zirui and Jiang, Zeren and Oswald, Martin R. and Song, Jie}, title = {From Rays to Projections: Better Inputs for Feed-Forward View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29012-29022} }
NexusFlow: Unifying Disparate Tasks under Partial Supervision via Invertible Flow Networks: Fangzhou Lin,

Yuping Wang,

Yuliang Guo,

Zixun Huang,

Xinyu Huang,

Haichong Zhang,

Kazunori Yamada,

Zhengzhong Tu,

Liu Ren,

Ziming Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Fangzhou and Wang, Yuping and Guo, Yuliang and Huang, Zixun and Huang, Xinyu and Zhang, Haichong and Yamada, Kazunori and Tu, Zhengzhong and Ren, Liu and Zhang, Ziming}, title = {NexusFlow: Unifying Disparate Tasks under Partial Supervision via Invertible Flow Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3761-3771} }
Exploring Spatiotemporal Feature Propagation for Video-Level Compressive Spectral Reconstruction: Dataset, Model and Benchmark: Lijing Cai,

Zhan Shi,

Chenglong Huang,

Jinyao Wu,

Qiping Li,

Zikang Huo,

Linsen Chen,

Chongde Zi,

Xun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Lijing and Shi, Zhan and Huang, Chenglong and Wu, Jinyao and Li, Qiping and Huo, Zikang and Chen, Linsen and Zi, Chongde and Cao, Xun}, title = {Exploring Spatiotemporal Feature Propagation for Video-Level Compressive Spectral Reconstruction: Dataset, Model and Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12522-12532} }
MDS-VQA: Model-Informed Data Selection for Video Quality Assessment: Jian Zou,

Xiaoyu Xu,

Zhihua Wang,

Yilin Wang,

Balu Adsumilli,

Kede Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Jian and Xu, Xiaoyu and Wang, Zhihua and Wang, Yilin and Adsumilli, Balu and Ma, Kede}, title = {MDS-VQA: Model-Informed Data Selection for Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22713-22722} }
Aligning Multi-Character Narrative Image Generation with Multi-Aspect Human Preferences: Ziyi Gao,

Zhipeng Wei,

Jingjing Chen,

Zhiyu Tan,

Hao Li,

Yi-Ping Phoebe Chen; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Ziyi and Wei, Zhipeng and Chen, Jingjing and Tan, Zhiyu and Li, Hao and Chen, Yi-Ping Phoebe}, title = {Aligning Multi-Character Narrative Image Generation with Multi-Aspect Human Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29244-29253} }
SEBA: Sample-Efficient Black-Box Attacks on Visual Reinforcement Learning: Tairan Huang,

Yulin Jin,

Junxu Liu,

Qingqing Ye,

Haibo Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Tairan and Jin, Yulin and Liu, Junxu and Ye, Qingqing and Hu, Haibo}, title = {SEBA: Sample-Efficient Black-Box Attacks on Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27861-27871} }
PP-Brep: Few-Shot B-rep Classification with Hybrid Graph Representation: Jiacheng Hao,

Chunying Liu,

Hao Guo,

Ruohan Wang,

Hongping Gan,

Yilei Shi; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Jiacheng and Liu, Chunying and Guo, Hao and Wang, Ruohan and Gan, Hongping and Shi, Yilei}, title = {PP-Brep: Few-Shot B-rep Classification with Hybrid Graph Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41616-41625} }
Multimodal Semantic Bias Mitigation for Diverse Text-To-3D Generation: Yukuan Min,

Muli Yang,

Jinhao Zhang,

Yuxuan Wang,

Yihang Zhu,

Jiexi Yan,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Min_2026_CVPR, author = {Min, Yukuan and Yang, Muli and Zhang, Jinhao and Wang, Yuxuan and Zhu, Yihang and Yan, Jiexi and Deng, Cheng}, title = {Multimodal Semantic Bias Mitigation for Diverse Text-To-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14779-14788} }
Mitigating Instance Entanglement in Instance-Dependent Partial Label Learning: Rui Zhao,

Bin Shi,

Kai Sun,

Bo Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rui and Shi, Bin and Sun, Kai and Dong, Bo}, title = {Mitigating Instance Entanglement in Instance-Dependent Partial Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39659-39668} }
STUR3D: Spatio-Temporal Unified Representation Learning for 3D Object Detection: Huijie Fan,

Pengrui Huang,

Qiang Wang,

Baojie Fan,

Jiahua Dong,

Liangqiong Qu; [pdf]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Huijie and Huang, Pengrui and Wang, Qiang and Fan, Baojie and Dong, Jiahua and Qu, Liangqiong}, title = {STUR3D: Spatio-Temporal Unified Representation Learning for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33068-33077} }
InterRVOS: Interaction-Aware Referring Video Object Segmentation: Woojeong Jin,

Seongchan Kim,

Jaeho Lee,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Woojeong and Kim, Seongchan and Lee, Jaeho and Kim, Seungryong}, title = {InterRVOS: Interaction-Aware Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10367-10376} }
VideoWeaver: Multimodal Multi-View Video-to-Video Transfer for Embodied Agents: George Eskandar,

Fengyi Shen,

Mohammad Altillawi,

Dong Chen,

Yang Bai,

Liudi Yang,

Ziyuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Eskandar_2026_CVPR, author = {Eskandar, George and Shen, Fengyi and Altillawi, Mohammad and Chen, Dong and Bai, Yang and Yang, Liudi and Liu, Ziyuan}, title = {VideoWeaver: Multimodal Multi-View Video-to-Video Transfer for Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29620-29630} }
TopoSlide: Topologically-Informed Histopathology Whole Slide Image Representation Learning: Shahira Abousamra,

Asmita Sood,

Sylvia Plevritis; [pdf] [supp]
[bibtex]
@InProceedings{Abousamra_2026_CVPR, author = {Abousamra, Shahira and Sood, Asmita and Plevritis, Sylvia}, title = {TopoSlide: Topologically-Informed Histopathology Whole Slide Image Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13691-13701} }
ResDiT: Evoking the Intrinsic Resolution Scalability in Diffusion Transformers: Yiyang Ma,

Feng Zhou,

Xuedan Yin,

Pu Cao,

Yonghao Dang,

Jianqin Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yiyang and Zhou, Feng and Yin, Xuedan and Cao, Pu and Dang, Yonghao and Yin, Jianqin}, title = {ResDiT: Evoking the Intrinsic Resolution Scalability in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40612-40621} }
REACH: Explicit Recovery Behavior for Diffusion Policies: Zundong Ke,

Junlin Chen,

Jiayi Zhu,

Kuanhao Xia,

Boyi Zhao,

Jiayuan Gu; [pdf] [supp]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Zundong and Chen, Junlin and Zhu, Jiayi and Xia, Kuanhao and Zhao, Boyi and Gu, Jiayuan}, title = {REACH: Explicit Recovery Behavior for Diffusion Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38498-38508} }
Same Content, Different Answers: Cross-Modal Inconsistency in MLLMs: Angela van Sprang,

Laurens Samson,

Ana Lucic,

Erman Acar,

Sennay Ghebreab,

Yuki M. Asano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{van_Sprang_2026_CVPR, author = {van Sprang, Angela and Samson, Laurens and Lucic, Ana and Acar, Erman and Ghebreab, Sennay and Asano, Yuki M.}, title = {Same Content, Different Answers: Cross-Modal Inconsistency in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8781-8790} }
LNEM: Lunar Neural Elevation Model: Suwan Lee,

Jo Ryeong Yim,

Kibaek Park,

Dong-Gyu Kim,

Eunhyeuk Kim,

Minsup Jeong,

Chae Kyung Sim,

Seokju Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Suwan and Yim, Jo Ryeong and Park, Kibaek and Kim, Dong-Gyu and Kim, Eunhyeuk and Jeong, Minsup and Sim, Chae Kyung and Lee, Seokju}, title = {LNEM: Lunar Neural Elevation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6508-6517} }
Omni IIE Bench: Benchmarking the Practical Capabilities of Image Editing Models: Yujia Yang,

Yuanxiang Wang,

Zhenyu Guan,

Tiankun Yang,

Chenxi Bao,

Haopeng Jin,

Jinwen Luo,

Xinyu Zuo,

Lisheng Duan,

Haijin Liang,

Jin Ma,

Xinming Wang,

Ruiwen Tao,

Hongzhu Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yujia and Wang, Yuanxiang and Guan, Zhenyu and Yang, Tiankun and Bao, Chenxi and Jin, Haopeng and Luo, Jinwen and Zuo, Xinyu and Duan, Lisheng and Liang, Haijin and Ma, Jin and Wang, Xinming and Tao, Ruiwen and Yi, Hongzhu}, title = {Omni IIE Bench: Benchmarking the Practical Capabilities of Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1089-1099} }
VibeToken: Scaling 1D Image Tokenizers and Autoregressive Models for Dynamic Resolution Generations: Maitreya Patel,

Jingtao Li,

Weiming Zhuang,

Yezhou Yang,

Lingjuan Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patel_2026_CVPR, author = {Patel, Maitreya and Li, Jingtao and Zhuang, Weiming and Yang, Yezhou and Lv, Lingjuan}, title = {VibeToken: Scaling 1D Image Tokenizers and Autoregressive Models for Dynamic Resolution Generations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2058-2068} }
Tracking by Predicting 3-D Gaussians Over Time: Tanish Baranwal,

Himanshu Gaurav Singh,

Jathushan Rajasegaran,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baranwal_2026_CVPR, author = {Baranwal, Tanish and Singh, Himanshu Gaurav and Rajasegaran, Jathushan and Malik, Jitendra}, title = {Tracking by Predicting 3-D Gaussians Over Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42527-42537} }
TokenSplat: Token-aligned 3D Gaussian Splatting for Feed-forward Pose-free Reconstruction: Yihui Li,

Chengxin Lv,

Zichen Tang,

Hongyu Yang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yihui and Lv, Chengxin and Tang, Zichen and Yang, Hongyu and Huang, Di}, title = {TokenSplat: Token-aligned 3D Gaussian Splatting for Feed-forward Pose-free Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40886-40895} }
Iris: Bringing Real-World Priors into Diffusion Model for Monocular Depth Estimation: Xinhao Cai,

Gensheng Pei,

Zeren Sun,

Yazhou Yao,

Fumin Shen,

Wenguan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Xinhao and Pei, Gensheng and Sun, Zeren and Yao, Yazhou and Shen, Fumin and Wang, Wenguan}, title = {Iris: Bringing Real-World Priors into Diffusion Model for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26909-26919} }
Diagnose, Correct, and Learn from Manipulation Failures via Visual Symbols: Xianchao Zeng,

Xinyu Zhou,

Youcheng Li,

Jiayou Shi,

Tianle Li,

Liangming Chen,

Lei Ren,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xianchao and Zhou, Xinyu and Li, Youcheng and Shi, Jiayou and Li, Tianle and Chen, Liangming and Ren, Lei and Li, Yong-Lu}, title = {Diagnose, Correct, and Learn from Manipulation Failures via Visual Symbols}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42386-42395} }
CAD-Refiner: A Unified Framework for CAD Generation and Iterative Editing: Meng Yuan,

Dawei Lin,

Hongxia Xie,

Tieru Wu,

Rui Ma; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Meng and Lin, Dawei and Xie, Hongxia and Wu, Tieru and Ma, Rui}, title = {CAD-Refiner: A Unified Framework for CAD Generation and Iterative Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3244-3253} }
RewardFlow: Generate Images by Optimizing What You Reward: Onkar Susladkar,

Dong-Hwan Jang,

Tushar Prakash,

Adheesh Juvekar,

Vedant Shah,

Ayush Barik,

Nabeel Bashir,

Muntasir Wahed,

Ritish Shrirao,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Susladkar_2026_CVPR, author = {Susladkar, Onkar and Jang, Dong-Hwan and Prakash, Tushar and Juvekar, Adheesh and Shah, Vedant and Barik, Ayush and Bashir, Nabeel and Wahed, Muntasir and Shrirao, Ritish and Lourentzou, Ismini}, title = {RewardFlow: Generate Images by Optimizing What You Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20066-20076} }
PAS: Prelim Attention Score for Detecting Object Hallucinations in Large Vision-Language Models: Nhat Hoang,

Minh Vu,

My T. Thai,

Manish Bhattarai; [pdf] [supp]
[bibtex]
@InProceedings{Hoang_2026_CVPR, author = {Hoang, Nhat and Vu, Minh and Thai, My T. and Bhattarai, Manish}, title = {PAS: Prelim Attention Score for Detecting Object Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18273-18283} }
HOPS: Hierarchical Open-vocabulary Part Segmentation with Attention-Aware Filtering and Affinity-Guided Enhancement: Xinlong Li,

Di Lin,

Shaoyiyi Gao,

Yaxuan Liu,

Jixian He,

Jiaxin Li,

Ruonan Liu,

Qing Guo,

Kairui Yang,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinlong and Lin, Di and Gao, Shaoyiyi and Liu, Yaxuan and He, Jixian and Li, Jiaxin and Liu, Ruonan and Guo, Qing and Yang, Kairui and Feng, Wei}, title = {HOPS: Hierarchical Open-vocabulary Part Segmentation with Attention-Aware Filtering and Affinity-Guided Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27719-27729} }
Captain Safari: A World Engine with Pose-Aligned 3D Memory: Yu-Cheng Chou,

Xingrui Wang,

Yitong Li,

Jiahao Wang,

Hanting Liu,

Cihang Xie,

Alan Yuille,

Junfei Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chou_2026_CVPR, author = {Chou, Yu-Cheng and Wang, Xingrui and Li, Yitong and Wang, Jiahao and Liu, Hanting and Xie, Cihang and Yuille, Alan and Xiao, Junfei}, title = {Captain Safari: A World Engine with Pose-Aligned 3D Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25347-25357} }
When LoRA Betrays: Backdooring Text-to-Image Models by Masquerading as Benign Adapters: Liangwei Lyu,

Jiaqi Xu,

Jianwei Ding,

Qiyao Deng; [pdf] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Liangwei and Xu, Jiaqi and Ding, Jianwei and Deng, Qiyao}, title = {When LoRA Betrays: Backdooring Text-to-Image Models by Masquerading as Benign Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8577-8586} }
When Understanding Becomes a Risk: Authenticity and Safety Risks in the Emerging Image Generation Paradigm: Ye Leng,

Junjie Chu,

Mingjie Li,

Chenhao Lin,

Chao Shen,

Michael Backes,

Yun Shen,

Yang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Leng_2026_CVPR, author = {Leng, Ye and Chu, Junjie and Li, Mingjie and Lin, Chenhao and Shen, Chao and Backes, Michael and Shen, Yun and Zhang, Yang}, title = {When Understanding Becomes a Risk: Authenticity and Safety Risks in the Emerging Image Generation Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39372-39382} }
PAMotion: Physics-Aware Motion Generation for Full-Body Interaction with Multiple Objects: Yan Di,

Yuheng Li,

Yaoxing Wang,

Mengge Liu,

Shan Gao,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Di_2026_CVPR, author = {Di, Yan and Li, Yuheng and Wang, Yaoxing and Liu, Mengge and Gao, Shan and Ji, Xiangyang}, title = {PAMotion: Physics-Aware Motion Generation for Full-Body Interaction with Multiple Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30661-30674} }
Streaming Video Crime Anticipation with Spatio-Temporal Causal Reasoning: Yusong Wang,

Zheyuan Gu,

Keyu Mao,

Minghao Shao,

Mingkun Xu,

Prayag Tiwari,

Jiawei Shao,

Qingsong Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yusong and Gu, Zheyuan and Mao, Keyu and Shao, Minghao and Xu, Mingkun and Tiwari, Prayag and Shao, Jiawei and Zhao, Qingsong}, title = {Streaming Video Crime Anticipation with Spatio-Temporal Causal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16933-16943} }
TopoMA: Topology-Guided Multi-Agent Dense RGB 3D Reconstruction via Distributed Inference: Xuanxuan Zhang,

ShuHui Shi,

Tianxiang Zhang,

Zhetao Guo,

Huang Zixuan,

You Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuanxuan and Shi, ShuHui and Zhang, Tianxiang and Guo, Zhetao and Zixuan, Huang and Li, You}, title = {TopoMA: Topology-Guided Multi-Agent Dense RGB 3D Reconstruction via Distributed Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21784-21793} }
Rejection Mixing: Fast Semantic Propagation of Mask Tokens for Efficient DLLM Inference: Yushi Ye,

Feng Hong,

Huangjie Zheng,

Xu Chen,

Zhiyong Chen,

Yanfeng Wang,

Jiangchao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yushi and Hong, Feng and Zheng, Huangjie and Chen, Xu and Chen, Zhiyong and Wang, Yanfeng and Yao, Jiangchao}, title = {Rejection Mixing: Fast Semantic Propagation of Mask Tokens for Efficient DLLM Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17619-17629} }
Bootstrapping Video Semantic Segmentation Model via Distillation-assisted Test-Time Adaptation: Jihun Kim,

Hoyong Kwon,

Hyeokjun Kweon,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jihun and Kwon, Hoyong and Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {Bootstrapping Video Semantic Segmentation Model via Distillation-assisted Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10766-10777} }
ICTPolarReal: A Polarized Reflection and Material Dataset of Real World Objects: Jing Yang,

Krithika Dharanikota,

Emily Jia,

Haiwei Chen,

Yajie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Dharanikota, Krithika and Jia, Emily and Chen, Haiwei and Zhao, Yajie}, title = {ICTPolarReal: A Polarized Reflection and Material Dataset of Real World Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6518-6527} }
Scaling View Synthesis Transformers: Evan Kim,

Hyunwoo Ryu,

Thomas W. Mitchel,

Vincent Sitzmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Evan and Ryu, Hyunwoo and Mitchel, Thomas W. and Sitzmann, Vincent}, title = {Scaling View Synthesis Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28893-28902} }
Multi-modal Test-time Adaptation via Adaptive Probabilistic Gaussian Calibration: Jinglin Xu,

Yi Li,

Chuxiong Sun,

Xiao Xu,

Jiangmeng Li,

Fanjiang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jinglin and Li, Yi and Sun, Chuxiong and Xu, Xiao and Li, Jiangmeng and Xu, Fanjiang}, title = {Multi-modal Test-time Adaptation via Adaptive Probabilistic Gaussian Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30268-30277} }
Towards Photorealistic and Efficient Bokeh Rendering via Diffusion Framework: Linxiao Shi,

Siming Zheng,

Zerong Wang,

Hao Zhang,

Jinwei Chen,

Bo Li,

Shifeng Chen,

Peng-Tao Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Linxiao and Zheng, Siming and Wang, Zerong and Zhang, Hao and Chen, Jinwei and Li, Bo and Chen, Shifeng and Jiang, Peng-Tao}, title = {Towards Photorealistic and Efficient Bokeh Rendering via Diffusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {231-240} }
Cross-Domain Few-Shot Segmentation via Multi-view Progressive Adaptation: Jiahao Nie,

Guanqiao Fu,

Wenbin An,

Yap-Peng Tan,

Alex C. Kot,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Jiahao and Fu, Guanqiao and An, Wenbin and Tan, Yap-Peng and Kot, Alex C. and Lu, Shijian}, title = {Cross-Domain Few-Shot Segmentation via Multi-view Progressive Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41594-41604} }
No Hard Negatives Required: Concept Centric Learning Leads to Compositionality without Degrading Zero-shot Capabilities of Contrastive Models: Hai X. Pham,

David T. Hoffmann,

Ricardo Guerrero,

Brais Martinez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2026_CVPR, author = {Pham, Hai X. and Hoffmann, David T. and Guerrero, Ricardo and Martinez, Brais}, title = {No Hard Negatives Required: Concept Centric Learning Leads to Compositionality without Degrading Zero-shot Capabilities of Contrastive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33901-33910} }
V2U4Real: A Real-world Large-scale Dataset for Vehicle-to-UAV Cooperative Perception: Weijia Li,

Haoen Xiang,

Tianxu Wang,

Shuaibing Wu,

Qiming Xia,

Cheng Wang,

Chenglu Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weijia and Xiang, Haoen and Wang, Tianxu and Wu, Shuaibing and Xia, Qiming and Wang, Cheng and Wen, Chenglu}, title = {V2U4Real: A Real-world Large-scale Dataset for Vehicle-to-UAV Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4728-4737} }
Think-Then-Generate: Structural Chain-of-Thought Reasoning for Consistent 3D Generation: Xinyue Liu,

Jin Liu,

Hongbo Wang,

Ran He,

Huaibo Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinyue and Liu, Jin and Wang, Hongbo and He, Ran and Huang, Huaibo}, title = {Think-Then-Generate: Structural Chain-of-Thought Reasoning for Consistent 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34270-34280} }
CCF: Complementary Collaborative Fusion for Domain Generalized Multi-Modal 3D Object Detection: Yuchen Wu,

Kun Wang,

Yining Pan,

Na Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yuchen and Wang, Kun and Pan, Yining and Zhao, Na}, title = {CCF: Complementary Collaborative Fusion for Domain Generalized Multi-Modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18745-18754} }
Hilbert-Geo: Solving Solid Geometric Problems by Neural-Symbolic Reasoning: Ruoran Xu,

Haoyu Cheng,

Bin Dong,

Qiufeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Ruoran and Cheng, Haoyu and Dong, Bin and Wang, Qiufeng}, title = {Hilbert-Geo: Solving Solid Geometric Problems by Neural-Symbolic Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9658-9667} }
PosterReward: Unlocking Accurate Evaluation for High-Quality Graphic Design Generation: Jianyu Lai,

Sixiang Chen,

Jialin Gao,

Hengyu Shi,

Zhongying Liu,

Fuxiang Zhai,

Junfeng Luo,

Xiaoming Wei,

Lujia Wang,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Jianyu and Chen, Sixiang and Gao, Jialin and Shi, Hengyu and Liu, Zhongying and Zhai, Fuxiang and Luo, Junfeng and Wei, Xiaoming and Wang, Lujia and Zhu, Lei}, title = {PosterReward: Unlocking Accurate Evaluation for High-Quality Graphic Design Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7762-7772} }
Structural-Semantic Perception for Diffusion-Guided Temporal Forgery Localization: Ligong Cao,

Yeting Guo,

Haoang Chi; [pdf]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Ligong and Guo, Yeting and Chi, Haoang}, title = {Structural-Semantic Perception for Diffusion-Guided Temporal Forgery Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35384-35393} }
OpenT2M: No-frill Motion Generation with Open-source, Large-scale, High-quality Data: Bin Cao,

Sipeng Zheng,

Hao Luo,

Boyuan Li,

Jing Liu,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Bin and Zheng, Sipeng and Luo, Hao and Li, Boyuan and Liu, Jing and Lu, Zongqing}, title = {OpenT2M: No-frill Motion Generation with Open-source, Large-scale, High-quality Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30640-30649} }
PGR-Net: Prior-Guided ROI Reasoning Network for Brain Tumor MRI Segmentation: Jiacheng Lu,

Hui Ding,

Shiyu Zhang,

Guoping Huo; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jiacheng and Ding, Hui and Zhang, Shiyu and Huo, Guoping}, title = {PGR-Net: Prior-Guided ROI Reasoning Network for Brain Tumor MRI Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22816-22825} }
GenMask: Adapting DiT for Segmentation via Direct Mask Generation: Yuhuan Yang,

Xianwei Zhuang,

Yuxuan Cai,

Chaofan Ma,

Shuai Bai,

Jiangchao Yao,

Ya Zhang,

Junyang Lin,

Yanfeng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuhuan and Zhuang, Xianwei and Cai, Yuxuan and Ma, Chaofan and Bai, Shuai and Yao, Jiangchao and Zhang, Ya and Lin, Junyang and Wang, Yanfeng}, title = {GenMask: Adapting DiT for Segmentation via Direct Mask Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20455-20467} }
DPL: Decoupled Prototype Learning for Enhancing Robustness of Vision-Language Transformers to Missing Modalities: Jueqing Lu,

Yuanyuan Qi,

Xiaohao Yang,

Shuaicheng Niu,

Fucai Ke,

Shujie Zhou,

Wei Tan,

Jionghao Lin,

Wray Buntine,

Hamid Rezatofighi,

Lan Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jueqing and Qi, Yuanyuan and Yang, Xiaohao and Niu, Shuaicheng and Ke, Fucai and Zhou, Shujie and Tan, Wei and Lin, Jionghao and Buntine, Wray and Rezatofighi, Hamid and Du, Lan}, title = {DPL: Decoupled Prototype Learning for Enhancing Robustness of Vision-Language Transformers to Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39219-39229} }
FedCART: Tackling Long-Tailed Distributions in Federated Adversarial Training via Classifier Refinement: Yuchen Qin,

Yizhi Zhou,

Junxiao Wang,

Xin Xie,

Heng Qi; [pdf]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yuchen and Zhou, Yizhi and Wang, Junxiao and Xie, Xin and Qi, Heng}, title = {FedCART: Tackling Long-Tailed Distributions in Federated Adversarial Training via Classifier Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24557-24566} }
STAvatar: Soft Binding and Temporal Density Control for Monocular 3D Head Avatars Reconstruction: Jiankuo Zhao,

Xiangyu Zhu,

Zidu Wang,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiankuo and Zhu, Xiangyu and Wang, Zidu and Lei, Zhen}, title = {STAvatar: Soft Binding and Temporal Density Control for Monocular 3D Head Avatars Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10996-11005} }
PiLoT: Neural Pixel-to-3D Registration for UAV-based Ego and Target Geo-localization: Xiaoya Cheng,

Long Wang,

Yan Liu,

Xinyi Liu,

Hanlin Tan,

Yu Liu,

Maojun Zhang,

Shen Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Xiaoya and Wang, Long and Liu, Yan and Liu, Xinyi and Tan, Hanlin and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {PiLoT: Neural Pixel-to-3D Registration for UAV-based Ego and Target Geo-localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5379-5388} }
Reading Your Actions: Learning Generalizable Action Representations via Pre-training AEMG: Zhenghao Huang,

Huilin Yao,

Kaikai Wang,

Lin Shu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhenghao and Yao, Huilin and Wang, Kaikai and Shu, Lin}, title = {Reading Your Actions: Learning Generalizable Action Representations via Pre-training AEMG}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20108-20117} }
Pano3DComposer: Feed-Forward Compositional 3D Scene Generation from Single Panoramic Image: Zidian Qiu,

Ancong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Zidian and Wu, Ancong}, title = {Pano3DComposer: Feed-Forward Compositional 3D Scene Generation from Single Panoramic Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5902-5911} }
Toward Generalizable Whole Brain Representations with High-Resolution Light-Sheet Data: Minyoung E. Kim,

Dae Hee Yun,

Aditi V. Patel,

Madeline Hon,

Webster Guan,

Taegeon Lee,

Brian Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung E. and Yun, Dae Hee and Patel, Aditi V. and Hon, Madeline and Guan, Webster and Lee, Taegeon and Nguyen, Brian}, title = {Toward Generalizable Whole Brain Representations with High-Resolution Light-Sheet Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35270-35279} }
Frequency-Aware Affinity for Weakly Supervised Semantic Segmentation: Ziqian Yang,

Xianglin Qiu,

Xinqiao Zhao,

Xiaolei Wang,

Quan Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ziqian and Qiu, Xianglin and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {Frequency-Aware Affinity for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20468-20477} }
Unified Personalized Understanding, Generating and Editing: Yu Zhong,

Tianwei Lin,

Ruike Zhu,

Yuqian Yuan,

Haoyu Zheng,

Liang Liang,

Wenqiao Zhang,

Feifei Shao,

Haoyuan Li,

Wanggui He,

Hao Jiang,

Yueting Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yu and Lin, Tianwei and Zhu, Ruike and Yuan, Yuqian and Zheng, Haoyu and Liang, Liang and Zhang, Wenqiao and Shao, Feifei and Li, Haoyuan and He, Wanggui and Jiang, Hao and Zhuang, Yueting}, title = {Unified Personalized Understanding, Generating and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29400-29409} }
OpenVision 2: A Family of Generative Pretrained Visual Encoders for Multimodal Learning: Yanqing Liu,

Xianhang Li,

Letian Zhang,

Zirui Wang,

Zeyu Zheng,

Yuyin Zhou,

Cihang Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yanqing and Li, Xianhang and Zhang, Letian and Wang, Zirui and Zheng, Zeyu and Zhou, Yuyin and Xie, Cihang}, title = {OpenVision 2: A Family of Generative Pretrained Visual Encoders for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39164-39174} }
OmniVTG: A Large-Scale Dataset and Training Paradigm for Open-World Video Temporal Grounding: Minghang Zheng,

Zihao Yin,

Yi Yang,

Yuxin Peng,

Yang Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Minghang and Yin, Zihao and Yang, Yi and Peng, Yuxin and Liu, Yang}, title = {OmniVTG: A Large-Scale Dataset and Training Paradigm for Open-World Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24620-24629} }
Cut to the Chase: Training-free Multimodal Summarization via Chain-of-Events: Xiaoxing You,

Qiang Huang,

Lingyu Li,

Xiaojun Chang,

Jun Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Xiaoxing and Huang, Qiang and Li, Lingyu and Chang, Xiaojun and Yu, Jun}, title = {Cut to the Chase: Training-free Multimodal Summarization via Chain-of-Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26219-26229} }
MGDHand: Multi-Granularity Prior-to-Inertial Distillation Framework for Sequential 3D Hand Pose Estimation from Sparse IMUs: Xinyi Wang,

Pengfei Ren,

Haoyang Zhang,

Hanling Zhan,

Yingxi Li,

Liang Xie,

Yue Gao,

Erwei Yin; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyi and Ren, Pengfei and Zhang, Haoyang and Zhan, Hanling and Li, Yingxi and Xie, Liang and Gao, Yue and Yin, Erwei}, title = {MGDHand: Multi-Granularity Prior-to-Inertial Distillation Framework for Sequential 3D Hand Pose Estimation from Sparse IMUs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13996-14005} }
FailureAtlas: Mapping the Failure Landscape of T2I Models via Active Exploration: Muxi Chen,

Zhaohua Zhang,

Chenchen Zhao,

Mingyang Chen,

Wenyu Jiang,

Tianwen Jiang,

Jianhuan Zhuo,

Yu Tang,

Qiuyong Xiao,

Jihong Zhang,

Qiang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Muxi and Zhang, Zhaohua and Zhao, Chenchen and Chen, Mingyang and Jiang, Wenyu and Jiang, Tianwen and Zhuo, Jianhuan and Tang, Yu and Xiao, Qiuyong and Zhang, Jihong and Xu, Qiang}, title = {FailureAtlas: Mapping the Failure Landscape of T2I Models via Active Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40782-40791} }
Saliency-Guided Representation with Consistency Policy Learning for Visual Unsupervised Reinforcement Learning: Jingbo Sun,

Qichao Zhang,

Songjun Tu,

Xing Fang,

Yupeng Zheng,

Haoran Li,

Ke Chen,

Dongbin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jingbo and Zhang, Qichao and Tu, Songjun and Fang, Xing and Zheng, Yupeng and Li, Haoran and Chen, Ke and Zhao, Dongbin}, title = {Saliency-Guided Representation with Consistency Policy Learning for Visual Unsupervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19508-19517} }
I-Scene: 3D Instance Models are Implicit Generalizable Spatial Learners: Lu Ling,

Yunhao Ge,

Yichen Sheng,

Aniket Bera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2026_CVPR, author = {Ling, Lu and Ge, Yunhao and Sheng, Yichen and Bera, Aniket}, title = {I-Scene: 3D Instance Models are Implicit Generalizable Spatial Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26974-26983} }
When Pretty Isn't Useful: Investigating Why Modern Text-to-Image Models Fail as Reliable Training Data Generators: Krzysztof Adamkiewicz,

Brian B. Moser,

Stanislav Frolov,

Tobias Christian Nauen,

Federico Raue,

Andreas Dengel; [pdf] [supp]
[bibtex]
@InProceedings{Adamkiewicz_2026_CVPR, author = {Adamkiewicz, Krzysztof and Moser, Brian B. and Frolov, Stanislav and Nauen, Tobias Christian and Raue, Federico and Dengel, Andreas}, title = {When Pretty Isn't Useful: Investigating Why Modern Text-to-Image Models Fail as Reliable Training Data Generators}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36660-36669} }
Cleaning the Pool: Progressive Filtering of Unlabeled Pools in Deep Active Learning: Denis Huseljic,

Marek Herde,

Lukas Rauch,

Paul Hahn,

Bernhard Sick; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huseljic_2026_CVPR, author = {Huseljic, Denis and Herde, Marek and Rauch, Lukas and Hahn, Paul and Sick, Bernhard}, title = {Cleaning the Pool: Progressive Filtering of Unlabeled Pools in Deep Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22238-22247} }
Seeing Clearly, Reasoning Confidently: Plug-and-Play Remedies for Vision Language Model Blindness: Xin Hu,

Haomiao Ni,

Yunbei Zhang,

Jihun Hamm,

Zechen Li,

Zhengming Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Xin and Ni, Haomiao and Zhang, Yunbei and Hamm, Jihun and Li, Zechen and Ding, Zhengming}, title = {Seeing Clearly, Reasoning Confidently: Plug-and-Play Remedies for Vision Language Model Blindness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18806-18815} }
Thinking with Drafts: Speculative Temporal Reasoning for Efficient Long Video Understanding: Pengfei Hu,

Meng Cao,

Yingyao Wang,

Yi Wang,

Jiahua Dong,

Jun Song,

Yu Cheng,

Bo Zheng,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Pengfei and Cao, Meng and Wang, Yingyao and Wang, Yi and Dong, Jiahua and Song, Jun and Cheng, Yu and Zheng, Bo and Liang, Xiaodan}, title = {Thinking with Drafts: Speculative Temporal Reasoning for Efficient Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36255-36266} }
Dataset Distillation by Influence Matching: Haoru Tan,

Wang Wang,

Sitong Wu,

Xiuzhe Wu,

Yang-Tian Sun,

Chirui Chang,

Shaofeng Zhang,

Xiaojuan Qi; [pdf]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Haoru and Wang, Wang and Wu, Sitong and Wu, Xiuzhe and Sun, Yang-Tian and Chang, Chirui and Zhang, Shaofeng and Qi, Xiaojuan}, title = {Dataset Distillation by Influence Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19654-19664} }
Clinically-Grounded Counterfactual Reasoning for Medical Video Diagnosis: Jianzhe Gao,

Churan Wang,

Weiyi Zhang,

Jianghua Li,

Li-An Li,

Wenguan Wang,

Yixin Zhu,

Yizhou Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jianzhe and Wang, Churan and Zhang, Weiyi and Li, Jianghua and Li, Li-An and Wang, Wenguan and Zhu, Yixin and Wang, Yizhou}, title = {Clinically-Grounded Counterfactual Reasoning for Medical Video Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7014-7025} }
RGB-Event based Pedestrian Attribute Recognition: A Benchmark Dataset and An Asymmetric RWKV Fusion Framework: Xiao Wang,

Haiyang Wang,

Shiao Wang,

Qiang Chen,

Jiandong Jin,

Haoyu Song,

Bo Jiang,

Chenglong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiao and Wang, Haiyang and Wang, Shiao and Chen, Qiang and Jin, Jiandong and Song, Haoyu and Jiang, Bo and Li, Chenglong}, title = {RGB-Event based Pedestrian Attribute Recognition: A Benchmark Dataset and An Asymmetric RWKV Fusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32745-32755} }
Efficient Video Object Segmentation and Tracking with Recurrent Dynamic Submodel: Weidong Tang,

Zhiyuan Liang,

Xinyan Wan,

Chen Zhu,

Zhaopan Xu,

Pengfei Zhou,

Yan Song,

Yang You,

Wangbo Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Weidong and Liang, Zhiyuan and Wan, Xinyan and Zhu, Chen and Xu, Zhaopan and Zhou, Pengfei and Song, Yan and You, Yang and Zhao, Wangbo}, title = {Efficient Video Object Segmentation and Tracking with Recurrent Dynamic Submodel}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20912-20921} }
Zero-Shot Depth Completion with Vision-Language Model: Zhiqiang Yan,

Yuan Wu,

Gim Hee Lee; [pdf]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Zhiqiang and Wu, Yuan and Lee, Gim Hee}, title = {Zero-Shot Depth Completion with Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19833-19843} }
Towards Holistic Modeling for Video Frame Interpolation with Auto-regressive Diffusion Transformers: Xinyu Peng,

Han Li,

Yuyang Huang,

Ziyang Zheng,

Yaoming Wang,

Xin Chen,

Wenrui Dai,

Chenglin Li,

Junni Zou,

Hongkai Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Xinyu and Li, Han and Huang, Yuyang and Zheng, Ziyang and Wang, Yaoming and Chen, Xin and Dai, Wenrui and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {Towards Holistic Modeling for Video Frame Interpolation with Auto-regressive Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11448-11458} }
R4: Retrieval-Augmented Reasoning for Vision-Language Models in 4D Spatio-Temporal Space: Tin Stribor Sohn,

Maximilian Dillitzer,

Jason J. Corso,

Eric Sax; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sohn_2026_CVPR, author = {Sohn, Tin Stribor and Dillitzer, Maximilian and Corso, Jason J. and Sax, Eric}, title = {R4: Retrieval-Augmented Reasoning for Vision-Language Models in 4D Spatio-Temporal Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38711-38721} }
WildCap: Facial Albedo Capture in the Wild via Hybrid Inverse Rendering: Yuxuan Han,

Xin Ming,

Tianxiao Li,

Zhuofan Shen,

Qixuan Zhang,

Lan Xu,

Feng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Yuxuan and Ming, Xin and Li, Tianxiao and Shen, Zhuofan and Zhang, Qixuan and Xu, Lan and Xu, Feng}, title = {WildCap: Facial Albedo Capture in the Wild via Hybrid Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10909-10920} }
SAVA-X: Ego-to-Exo Imitation Error Detection via Scene-Adaptive View Alignment and Bidirectional Cross View Fusion: Xiang Li,

Heqian Qiu,

Lanxiao Wang,

Benliu Qiu,

Fanman Meng,

Linfeng Xu,

Hongliang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Qiu, Heqian and Wang, Lanxiao and Qiu, Benliu and Meng, Fanman and Xu, Linfeng and Li, Hongliang}, title = {SAVA-X: Ego-to-Exo Imitation Error Detection via Scene-Adaptive View Alignment and Bidirectional Cross View Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28062-28073} }
SemanticVLA: Towards Semantic Reasoning over Action Memorization via Synergistic Explicit Trace and Latent Action Planning: Fei Ni,

Zhuo Chen,

Yifu Yuan,

Zibin Dong,

Xianze Yao,

Shan Luo,

Jianye Hao,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Fei and Chen, Zhuo and Yuan, Yifu and Dong, Zibin and Yao, Xianze and Luo, Shan and Hao, Jianye and Deng, Jiankang and Zafeiriou, Stefanos}, title = {SemanticVLA: Towards Semantic Reasoning over Action Memorization via Synergistic Explicit Trace and Latent Action Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12237-12247} }
AERGS-SLAM: Auto-Exposure-Robust Stereo 3D Gaussian Splatting SLAM: Zhiyu Zhou,

Feng Hui,

Yu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhiyu and Hui, Feng and Liu, Yu}, title = {AERGS-SLAM: Auto-Exposure-Robust Stereo 3D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40929-40938} }
Photo3D: Advancing Photorealistic 3D Generation through Structure-Aligned Detail Enhancement: Xinyue Liang,

Zhiyuan Ma,

Lingchen Sun,

Yanjun Guo,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Xinyue and Ma, Zhiyuan and Sun, Lingchen and Guo, Yanjun and Zhang, Lei}, title = {Photo3D: Advancing Photorealistic 3D Generation through Structure-Aligned Detail Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34237-34247} }
Rosetta Stone For Unified MLLMs: A Unified Tokenizer to Decipher Understanding and Generation: Wenyu Sun,

Hufei Li,

Ruijin Jin,

Xiangheng Kong,

Yuning Jiang; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Wenyu and Li, Hufei and Jin, Ruijin and Kong, Xiangheng and Jiang, Yuning}, title = {Rosetta Stone For Unified MLLMs: A Unified Tokenizer to Decipher Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22964-22974} }
Anomaly as Non-Conformity via Training-Free Graph Laplacian Energy Minimization: Jungwook Seo,

Minjeong Kim,

Younkwan Lee,

Seungho Shin,

Sungyong Baik; [pdf] [supp]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Jungwook and Kim, Minjeong and Lee, Younkwan and Shin, Seungho and Baik, Sungyong}, title = {Anomaly as Non-Conformity via Training-Free Graph Laplacian Energy Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21336-21345} }
TTL: Test-time Textual Learning for OOD Detection with Pretrained Vision-Language Models: Jinlun Ye,

Jiang Liao,

Runhe Lai,

Xinhua Lu,

Jiaxin Zhuang,

Zhiyong Gan,

Ruixuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Jinlun and Liao, Jiang and Lai, Runhe and Lu, Xinhua and Zhuang, Jiaxin and Gan, Zhiyong and Wang, Ruixuan}, title = {TTL: Test-time Textual Learning for OOD Detection with Pretrained Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27590-27599} }
Towards Stable Self-Supervised Object Representations in Unconstrained Egocentric Video: Yuting Tan,

Xilong Cheng,

Yunxiao Qin,

Zhengnan Li,

Jingjing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Yuting and Cheng, Xilong and Qin, Yunxiao and Li, Zhengnan and Zhang, Jingjing}, title = {Towards Stable Self-Supervised Object Representations in Unconstrained Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10545-10555} }
Ultra-Low Bitrate Perceptual Image Compression with Shallow Encoder: Tianyu Zhang,

Dong Liu,

Chang Wen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianyu and Liu, Dong and Chen, Chang Wen}, title = {Ultra-Low Bitrate Perceptual Image Compression with Shallow Encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12118-12128} }
FreqSIC: Frequency-aware Stereo Image Compression with Bi-directional Checkerboard Context Model: Shiyu Qin,

Yongkang Lu,

Yimin Zhou,

Jiawei Li,

Yifan Ren,

Yuerong Xue,

Shu-Tao Xia,

Bin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Shiyu and Lu, Yongkang and Zhou, Yimin and Li, Jiawei and Ren, Yifan and Xue, Yuerong and Xia, Shu-Tao and Chen, Bin}, title = {FreqSIC: Frequency-aware Stereo Image Compression with Bi-directional Checkerboard Context Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19393-19402} }
T2SGrid: Temporal-to-Spatial Gridification for Video Temporal Grounding: Chaohong Guo,

Yihan He,

Yongwei Nie,

Fei Ma,

Xuemiao Xu,

Chengjiang Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Chaohong and He, Yihan and Nie, Yongwei and Ma, Fei and Xu, Xuemiao and Long, Chengjiang}, title = {T2SGrid: Temporal-to-Spatial Gridification for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3443-3454} }
LiREC-Net: A Target-Free and Learning-Based Network for LiDAR, RGB, and Event Calibration: Aditya Ranjan Dash,

Ramy Battrawy,

René Schuster,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dash_2026_CVPR, author = {Dash, Aditya Ranjan and Battrawy, Ramy and Schuster, Ren\'e and Stricker, Didier}, title = {LiREC-Net: A Target-Free and Learning-Based Network for LiDAR, RGB, and Event Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31421-31429} }
OVI-MAP: Open-Vocabulary Instance-Semantic Mapping: Zilong Deng,

Federico Tombari,

Marc Pollefeys,

Johanna Wald,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Zilong and Tombari, Federico and Pollefeys, Marc and Wald, Johanna and Barath, Daniel}, title = {OVI-MAP: Open-Vocabulary Instance-Semantic Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12606-12616} }
GDFA: Geometry-Driven Federated Unlearning with Directional Task Vector Alignment: Xiuting Weng,

Ruizhi Pu,

Yuanhang Yao,

Kun Yue,

Zhiwen Tang,

Lixing Yu; [pdf] [supp]
[bibtex]
@InProceedings{Weng_2026_CVPR, author = {Weng, Xiuting and Pu, Ruizhi and Yao, Yuanhang and Yue, Kun and Tang, Zhiwen and Yu, Lixing}, title = {GDFA: Geometry-Driven Federated Unlearning with Directional Task Vector Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10346-10356} }
What Do Visual Tokens Really Encode? Uncovering Sparsity and Redundancy in Multimodal Large Language Models: Yingqi Fan,

Junlong Tong,

Anhao Zhao,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yingqi and Tong, Junlong and Zhao, Anhao and Shen, Xiaoyu}, title = {What Do Visual Tokens Really Encode? Uncovering Sparsity and Redundancy in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11987-11997} }
Less is More: Data-Efficient Adaptation for Controllable Text-to-Video Generation: Shihan Cheng,

Nilesh Kulkarni,

David Hyde,

Dmitriy Smirnov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Shihan and Kulkarni, Nilesh and Hyde, David and Smirnov, Dmitriy}, title = {Less is More: Data-Efficient Adaptation for Controllable Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14811-14821} }
All in One: Unifying Deepfake Detection, Tampering Localization, and Source Tracing with a Robust Landmark-Identity Watermark: Junjiang Wu,

Liejun Wang,

Zhiqing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Junjiang and Wang, Liejun and Guo, Zhiqing}, title = {All in One: Unifying Deepfake Detection, Tampering Localization, and Source Tracing with a Robust Landmark-Identity Watermark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14106-14115} }
Socratic-Geo: Synthetic Data Generation and Cross-Modal Geometric Reasoning via Multi-Agent Interaction: Zhengbo Jiao,

Zifan Zhang,

Shaobo Wang,

Wei Wang,

Bing Zhao,

Hu Wei,

Linfeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Zhengbo and Zhang, Zifan and Wang, Shaobo and Wang, Wei and Zhao, Bing and Wei, Hu and Zhang, Linfeng}, title = {Socratic-Geo: Synthetic Data Generation and Cross-Modal Geometric Reasoning via Multi-Agent Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23795-23804} }
Coordinate Denoising for Non-Equilibrium Molecular Representation Learning: Qianwei Tang,

Baile Xu,

Jian Zhao,

Furao Shen; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Qianwei and Xu, Baile and Zhao, Jian and Shen, Furao}, title = {Coordinate Denoising for Non-Equilibrium Molecular Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3584-3593} }
Drainage: A Unifying Framework for Addressing Class Uncertainty: Yasser Taha,

Grégoire Montavon,

Nils Körber; [pdf] [supp]
[bibtex]
@InProceedings{Taha_2026_CVPR, author = {Taha, Yasser and Montavon, Gr\'egoire and K\"orber, Nils}, title = {Drainage: A Unifying Framework for Addressing Class Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41934-41943} }
Learning Cross-View Object Correspondence via Cycle-Consistent Mask Prediction: Shannan Yan,

Leqi Zheng,

Keyu Lv,

Jingchen Ni,

Hongyang Wei,

Jiajun Zhang,

Guangting Wang,

Jing LYU,

Chun Yuan,

Fengyun Rao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Shannan and Zheng, Leqi and Lv, Keyu and Ni, Jingchen and Wei, Hongyang and Zhang, Jiajun and Wang, Guangting and LYU, Jing and Yuan, Chun and Rao, Fengyun}, title = {Learning Cross-View Object Correspondence via Cycle-Consistent Mask Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6653-6663} }
Can We Build Scene Graphs, Not Classify Them? FlowSG: Progressive Image-Conditioned Scene Graph Generation with Flow Matching: Xin Hu,

Ke Qin,

Wen Yin,

Yuan-Fang Li,

Ming Li,

Tao He; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Xin and Qin, Ke and Yin, Wen and Li, Yuan-Fang and Li, Ming and He, Tao}, title = {Can We Build Scene Graphs, Not Classify Them? FlowSG: Progressive Image-Conditioned Scene Graph Generation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10208-10218} }
SATTC: Structure-Aware Label-Free Test-Time Calibration for Cross-Subject EEG-to-Image Retrieval: Qunjie Huang,

Weina Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qunjie and Zhu, Weina}, title = {SATTC: Structure-Aware Label-Free Test-Time Calibration for Cross-Subject EEG-to-Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16887-16896} }
Restore Text First, Enhance Image Later: Two-Stage Scene Text Image Super-Resolution with Glyph Structure Guidance: Minxing Luo,

Linlong Fan,

Qiushi Wang,

Ge Wu,

Yiyan Luo,

Yuhang Yu,

Jinwei Chen,

Yaxing Wang,

Qingnan Fan,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Minxing and Fan, Linlong and Wang, Qiushi and Wu, Ge and Luo, Yiyan and Yu, Yuhang and Chen, Jinwei and Wang, Yaxing and Fan, Qingnan and Yang, Jian}, title = {Restore Text First, Enhance Image Later: Two-Stage Scene Text Image Super-Resolution with Glyph Structure Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30553-30563} }
Lifelong Imitation Learning with Multimodal Latent Replay and Incremental Adjustment: Fanqi Yu,

Matteo Tiezzi,

Tommaso Apicella,

Cigdem Beyan,

Vittorio Murino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Fanqi and Tiezzi, Matteo and Apicella, Tommaso and Beyan, Cigdem and Murino, Vittorio}, title = {Lifelong Imitation Learning with Multimodal Latent Replay and Incremental Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6740-6749} }
Evo-Retriever: LLM-Guided Curriculum Evolution with Viewpoint-Pathway Collaboration for Multimodal Document Retrieval: Weiqing Li,

Jinyue Guo,

Yaqi Wang,

Haiyang Xiao,

Yuewei Zhang,

Guohua Liu,

Hao Henry Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiqing and Guo, Jinyue and Wang, Yaqi and Xiao, Haiyang and Zhang, Yuewei and Liu, Guohua and Wang, Hao Henry}, title = {Evo-Retriever: LLM-Guided Curriculum Evolution with Viewpoint-Pathway Collaboration for Multimodal Document Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31113-31123} }
A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation: Wentao Qu,

Guofeng Mei,

Yang Wu,

YongShun Gong,

Xiaoshui Huang,

Liang Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Wentao and Mei, Guofeng and Wu, Yang and Gong, YongShun and Huang, Xiaoshui and Xiao, Liang}, title = {A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9434-9444} }
Concept Regions Matter: Benchmarking CLIP with a New Cluster-Importance Approach: Aishwarya Agarwal,

Srikrishna Karanam,

Vineet Gandhi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agarwal_2026_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {Concept Regions Matter: Benchmarking CLIP with a New Cluster-Importance Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2864-2874} }
Fine-Grained Multi Image Object Hallucination Benchmark: Joonki Min,

Chaeyun Kim,

Hyungwook Choi,

Yejin Kim,

Kihyun Kim,

Yohan Jo,

Joonseok Lee; [pdf] [supp]
[bibtex]
@InProceedings{Min_2026_CVPR, author = {Min, Joonki and Kim, Chaeyun and Choi, Hyungwook and Kim, Yejin and Kim, Kihyun and Jo, Yohan and Lee, Joonseok}, title = {Fine-Grained Multi Image Object Hallucination Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18295-18305} }
MixerCSeg: An Efficient Mixer Architecture for Crack Segmentation via Decoupled Mamba Attention: Zilong Zhao,

Zhengming Ding,

Pei Niu,

Wenhao Sun,

Feng Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zilong and Ding, Zhengming and Niu, Pei and Sun, Wenhao and Guo, Feng}, title = {MixerCSeg: An Efficient Mixer Architecture for Crack Segmentation via Decoupled Mamba Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17969-17978} }
Co-Me: Confidence Guided Token Merging for Visual Geometric Transformers: Yutian Chen,

Yuheng Qiu,

Ruogu Li,

Jay Patrikar,

Sebastian Scherer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yutian and Qiu, Yuheng and Li, Ruogu and Patrikar, Jay and Scherer, Sebastian}, title = {Co-Me: Confidence Guided Token Merging for Visual Geometric Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14590-14599} }
EagleNet: Energy-Aware Fine-Grained Relationship Learning Network for Text-Video Retrieval: Yuhan Chen,

Pengwen Dai,

Chuan Wang,

Dayan Wu,

Xiaochun Cao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuhan and Dai, Pengwen and Wang, Chuan and Wu, Dayan and Cao, Xiaochun}, title = {EagleNet: Energy-Aware Fine-Grained Relationship Learning Network for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23901-23911} }
Local Precise Refinement: A Dual-Gated Mixture-of-Experts for Enhancing Foundation Model Generalization against Spectral Shifts: Xi Chen,

Maojun Zhang,

Yu Liu,

Shen Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xi and Zhang, Maojun and Liu, Yu and Yan, Shen}, title = {Local Precise Refinement: A Dual-Gated Mixture-of-Experts for Enhancing Foundation Model Generalization against Spectral Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20521-20531} }
Stereo World Model: Camera-Guided Stereo Video Generation: Yang-Tian Sun,

Zehuan Huang,

Yifan Niu,

Lin Ma,

Yan-Pei Cao,

Yuewen Ma,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yang-Tian and Huang, Zehuan and Niu, Yifan and Ma, Lin and Cao, Yan-Pei and Ma, Yuewen and Qi, Xiaojuan}, title = {Stereo World Model: Camera-Guided Stereo Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18342-18353} }
MotionEnhancer: Leveraging Video Diffusion for Motion-Enhanced Vision-Language Models: Yifan Xu,

Chao Zhang,

Ruifei Ma,

Fei Gao,

Zhifei Yang,

Jiaxing Qi,

Zhipeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yifan and Zhang, Chao and Ma, Ruifei and Gao, Fei and Yang, Zhifei and Qi, Jiaxing and Chen, Zhipeng}, title = {MotionEnhancer: Leveraging Video Diffusion for Motion-Enhanced Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2778-2787} }
OrionEdit: Bridging Reference and Source Images for Generalized Cross-Image Editing: Zeyu Jiang,

Lai Man Po,

Xuyuan Xu,

Yexin Wang,

Guoping Gong,

Haoxuan Wu,

Chenbo Yan,

Kun Li,

Yuyang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zeyu and Po, Lai Man and Xu, Xuyuan and Wang, Yexin and Gong, Guoping and Wu, Haoxuan and Yan, Chenbo and Li, Kun and Liu, Yuyang}, title = {OrionEdit: Bridging Reference and Source Images for Generalized Cross-Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9127-9138} }
Accelerating Streaming Video Large Language Models via Hierarchical Token Compression: Yiyu Wang,

Xuyang Liu,

Xiyan Gui,

Xinying Lin,

Boxue Yang,

Chenfei Liao,

Tailai Chen,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyu and Liu, Xuyang and Gui, Xiyan and Lin, Xinying and Yang, Boxue and Liao, Chenfei and Chen, Tailai and Zhang, Linfeng}, title = {Accelerating Streaming Video Large Language Models via Hierarchical Token Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18523-18533} }
MS^2Gait: A Multi-Scale Spatio-Temporal Fusion Network for LiDAR-based Gait Recognition: Shenyin Xu,

Yishan Wang,

Xinyu Li,

Rui Liu,

Zhongyuan Wang,

Xin Tian; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Shenyin and Wang, Yishan and Li, Xinyu and Liu, Rui and Wang, Zhongyuan and Tian, Xin}, title = {MS{\textasciicircum}2Gait: A Multi-Scale Spatio-Temporal Fusion Network for LiDAR-based Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17184-17193} }
G-MIXER: Geodesic Mixup-based Implicit Semantic Expansion and Explicit Semantic Re-ranking for Zero-Shot Composed Image Retrieval: Jiyoung Lim,

Heejae Yang,

Jee-Hyong Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Jiyoung and Yang, Heejae and Lee, Jee-Hyong}, title = {G-MIXER: Geodesic Mixup-based Implicit Semantic Expansion and Explicit Semantic Re-ranking for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33891-33900} }
Reconstructing CLIP for Open-Vocabulary Dense Perception: Yajie Liu,

Jinjin Zhang,

Qingjie Liu,

Di Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yajie and Zhang, Jinjin and Liu, Qingjie and Huang, Di}, title = {Reconstructing CLIP for Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39208-39218} }
Cross-Axis Feature Fusion with Joint-Wise Motion Difference Prediction for Text-Based 3D Human Motion Editing: Gyojin Han,

Junmo Kim; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Gyojin and Kim, Junmo}, title = {Cross-Axis Feature Fusion with Joint-Wise Motion Difference Prediction for Text-Based 3D Human Motion Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30618-30628} }
GeCo: Geometry-Consistent Regularization for Domain Generalized Semantic Segmentation: Qi Zang,

Dong Zhao,

Nan Pu,

Wenjing Li,

Zhun Zhong,

Meng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zang_2026_CVPR, author = {Zang, Qi and Zhao, Dong and Pu, Nan and Li, Wenjing and Zhong, Zhun and Wang, Meng}, title = {GeCo: Geometry-Consistent Regularization for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {871-881} }
VIRD: View-Invariant Representation through Dual-Axis Transformation for Cross-View Pose Estimation: Juhye Park,

Wooju Lee,

Dasol Hong,

Changki Sung,

Youngwoo Seo,

Dongwan Kang,

Hyun Myung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Juhye and Lee, Wooju and Hong, Dasol and Sung, Changki and Seo, Youngwoo and Kang, Dongwan and Myung, Hyun}, title = {VIRD: View-Invariant Representation through Dual-Axis Transformation for Cross-View Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33693-33702} }
Gen3R: 3D Scene Generation Meets Feed-Forward Reconstruction: Jiaxin Huang,

Yuanbo Yang,

Bangbang Yang,

Lin Ma,

Yuewen Ma,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiaxin and Yang, Yuanbo and Yang, Bangbang and Ma, Lin and Ma, Yuewen and Liao, Yiyi}, title = {Gen3R: 3D Scene Generation Meets Feed-Forward Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25358-25369} }
REALM: An MLLM-Agent Framework for Open World 3D Reasoning Segmentation and Editing on Gaussian Splatting: Changyue Shi,

Minghao Chen,

Yiping Mao,

Chuxiao Yang,

Xinyuan Hu,

Jiajun Ding,

Zhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Changyue and Chen, Minghao and Mao, Yiping and Yang, Chuxiao and Hu, Xinyuan and Ding, Jiajun and Yu, Zhou}, title = {REALM: An MLLM-Agent Framework for Open World 3D Reasoning Segmentation and Editing on Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16779-16788} }
Unlocking Positive Transfer in Incrementally Learning Surgical Instruments: A Self-reflection Hierarchical Prompt Framework: Yu Zhu,

Kang Li,

Zheng Li,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yu and Li, Kang and Li, Zheng and Heng, Pheng-Ann}, title = {Unlocking Positive Transfer in Incrementally Learning Surgical Instruments: A Self-reflection Hierarchical Prompt Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21006-21015} }
AVGGT: Rethinking Global Attention for Accelerating VGGT: Xianbing Sun,

Zhikai Zhu,

Zhengyu Lou,

Bo Yang,

Jinyang Tang,

Liqing Zhang,

He Wang,

Jianfu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xianbing and Zhu, Zhikai and Lou, Zhengyu and Yang, Bo and Tang, Jinyang and Zhang, Liqing and Wang, He and Zhang, Jianfu}, title = {AVGGT: Rethinking Global Attention for Accelerating VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {251-260} }
Restore, Assess, Repeat: A Unified Framework for Iterative Image Restoration: I-Hsiang Chen,

Isma Hadji,

Enrique Sanchez,

Adrian Bulat,

Sy-Yen Kuo,

Radu Timofte,

Georgios Tzimiropoulos,

Brais Martinez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, I-Hsiang and Hadji, Isma and Sanchez, Enrique and Bulat, Adrian and Kuo, Sy-Yen and Timofte, Radu and Tzimiropoulos, Georgios and Martinez, Brais}, title = {Restore, Assess, Repeat: A Unified Framework for Iterative Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15506-15515} }
CDICS: Delving Into Fine-Grained Attribute for In-Context Segmentation via Compositional Prompts and Phased Decoupling: Zhiyu Li,

Dianmo Sheng,

Qi Chu,

Shilong Chen,

Tao Gong,

Zhou Wei,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiyu and Sheng, Dianmo and Chu, Qi and Chen, Shilong and Gong, Tao and Wei, Zhou and Yu, Nenghai}, title = {CDICS: Delving Into Fine-Grained Attribute for In-Context Segmentation via Compositional Prompts and Phased Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13179-13188} }
TimeBridge: Self-Supervised Video Representation Learning via Start-End Joint Embedding and In-Between Frame Prediction: Qin Wang,

Abigail Morrison,

Hanno Scharr,

Kai Krajsek; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qin and Morrison, Abigail and Scharr, Hanno and Krajsek, Kai}, title = {TimeBridge: Self-Supervised Video Representation Learning via Start-End Joint Embedding and In-Between Frame Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39647-39658} }
Learning to Identify Out-of-Distribution Objects for 3D LiDAR Anomaly Segmentation: Simone Mosco,

Daniel Fusaro,

Alberto Pretto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mosco_2026_CVPR, author = {Mosco, Simone and Fusaro, Daniel and Pretto, Alberto}, title = {Learning to Identify Out-of-Distribution Objects for 3D LiDAR Anomaly Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17204-17214} }
Object-Generalized Re-Identification: A Step Towards Universal Instance Perception: Shuoyi Chen,

Yurui Wu,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shuoyi and Wu, Yurui and Ye, Mang}, title = {Object-Generalized Re-Identification: A Step Towards Universal Instance Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18481-18491} }
Towards Real-World Document Parsing via Realistic Scene Synthesis and Document-Aware Training: Gengluo Li,

Pengyuan Lyu,

Chengquan Zhang,

Huawen Shen,

Liang Wu,

Xingyu Wan,

Gangyan Zeng,

Han Hu,

Can Ma,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Gengluo and Lyu, Pengyuan and Zhang, Chengquan and Shen, Huawen and Wu, Liang and Wan, Xingyu and Zeng, Gangyan and Hu, Han and Ma, Can and Zhou, Yu}, title = {Towards Real-World Document Parsing via Realistic Scene Synthesis and Document-Aware Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23709-23719} }
Thinking With Videos: Multimodal Tool-Augmented Reinforcement Learning for Long Video Reasoning: Haoji Zhang,

Xin Gu,

Jiawen Li,

Chixiang Ma,

Sule Bai,

Chubin Zhang,

Bowen Zhang,

Zhichao Zhou,

Dongliang He,

Yansong Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haoji and Gu, Xin and Li, Jiawen and Ma, Chixiang and Bai, Sule and Zhang, Chubin and Zhang, Bowen and Zhou, Zhichao and He, Dongliang and Tang, Yansong}, title = {Thinking With Videos: Multimodal Tool-Augmented Reinforcement Learning for Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32903-32914} }
High-Fidelity Mobile Avatars with Pruned Local Blendshapes: Youyi Zhan,

He Wang,

Tianjia Shao,

Kun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Youyi and Wang, He and Shao, Tianjia and Zhou, Kun}, title = {High-Fidelity Mobile Avatars with Pruned Local Blendshapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32345-32356} }
Wavelet-based Frame Selection by Detecting Semantic Boundary for Long Video Understanding: Wang Chen,

Yuhui Zeng,

Yongdong Luo,

Tianyu Xie,

Luojun Lin,

Jiayi Ji,

Yan Zhang,

Xiawu Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Wang and Zeng, Yuhui and Luo, Yongdong and Xie, Tianyu and Lin, Luojun and Ji, Jiayi and Zhang, Yan and Zheng, Xiawu}, title = {Wavelet-based Frame Selection by Detecting Semantic Boundary for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24052-24061} }
Bridging Pixels and Words: Mask-Aware Local Semantic Fusion for Multimodal Media Verification: Zizhao Chen,

Ping Wei,

Ziyang Ren,

Huan Li,

Xiangru Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zizhao and Wei, Ping and Ren, Ziyang and Li, Huan and Yin, Xiangru}, title = {Bridging Pixels and Words: Mask-Aware Local Semantic Fusion for Multimodal Media Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26561-26571} }
Robust Remote Sensing Image-Text Retrieval with Noisy Correspondence: Qiya Song,

Yiqiang Xie,

Yuan Sun,

Renwei Dian,

Xudong Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Qiya and Xie, Yiqiang and Sun, Yuan and Dian, Renwei and Kang, Xudong}, title = {Robust Remote Sensing Image-Text Retrieval with Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9732-9741} }
CodeV: Code with Images for Faithful Visual Reasoning via Tool-Aware Policy Optimization: Xinhai Hou,

Shaoyuan Xu,

Manan Biyani,

Moyan Li,

Jia Liu,

Todd C Hollon,

Bryan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Xinhai and Xu, Shaoyuan and Biyani, Manan and Li, Moyan and Liu, Jia and Hollon, Todd C and Wang, Bryan}, title = {CodeV: Code with Images for Faithful Visual Reasoning via Tool-Aware Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21500-21510} }
Neural Differentiation in Deep Networks: A Theoretical Framework for Expressivity and Representational Diversity: Boyuan Wang,

Richard Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Boyuan and Jiang, Richard}, title = {Neural Differentiation in Deep Networks: A Theoretical Framework for Expressivity and Representational Diversity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41944-41953} }
STAR-R1: Multi-View Spatial TrAnsformation Reasoning by Reinforcing Multimodal LLMs: Zongzhao Li,

Zongyang Ma,

Mingze Li,

Songyou Li,

Yu Rong,

Tingyang Xu,

Ziqi Zhang,

Deli Zhao,

Wenbing Huang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zongzhao and Ma, Zongyang and Li, Mingze and Li, Songyou and Rong, Yu and Xu, Tingyang and Zhang, Ziqi and Zhao, Deli and Huang, Wenbing}, title = {STAR-R1: Multi-View Spatial TrAnsformation Reasoning by Reinforcing Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12041-12051} }
Mario: Multimodal Graph Reasoning with Large Language Models: Yuanfu Sun,

Kang Li,

Pengkang Guo,

Jiajin Liu,

Qiaoyu Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yuanfu and Li, Kang and Guo, Pengkang and Liu, Jiajin and Tan, Qiaoyu}, title = {Mario: Multimodal Graph Reasoning with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19219-19228} }
ShapeAR: Generating Editable Shape Layers via Autoregressive Diffusion: Souymodip Chakraborty,

Ankur Singh,

Amit Vikram Singh,

Vineet Batra,

Ankit Phogat; [pdf]
[bibtex]
@InProceedings{Chakraborty_2026_CVPR, author = {Chakraborty, Souymodip and Singh, Ankur and Singh, Amit Vikram and Batra, Vineet and Phogat, Ankit}, title = {ShapeAR: Generating Editable Shape Layers via Autoregressive Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40664-40673} }
MRI Contrast Enhancement Kinetics World Model: Jindi Kong,

Yuting He,

Cong Xia,

Rongjun Ge,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Jindi and He, Yuting and Xia, Cong and Ge, Rongjun and Li, Shuo}, title = {MRI Contrast Enhancement Kinetics World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1288-1299} }
L3DR: 3D-aware LiDAR Diffusion and Rectification: Quan Liu,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Quan and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {L3DR: 3D-aware LiDAR Diffusion and Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17153-17163} }
Semantic Derivative Flow: Graph-Guided Diffusion for Controllable Instance Interactions: Shibin Mei,

Hang Wang,

Bingbing Ni; [pdf] [supp]
[bibtex]
@InProceedings{Mei_2026_CVPR, author = {Mei, Shibin and Wang, Hang and Ni, Bingbing}, title = {Semantic Derivative Flow: Graph-Guided Diffusion for Controllable Instance Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14822-14831} }
Towards Stable Federated Continual Test-Time Adaptation in Wild World: Liwen Wang,

Xingbo Dong,

Iman Yi Liao,

Zhe Jin; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Liwen and Dong, Xingbo and Liao, Iman Yi and Jin, Zhe}, title = {Towards Stable Federated Continual Test-Time Adaptation in Wild World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29451-29461} }
Beyond Ground-Truth: Leveraging Image Quality Priors for Real-World Image Restoration: Fengyang Xiao,

Peng Hu,

Lei Xu,

XingE Guo,

Guanyi Qin,

Yuqi Shen,

Chengyu Fang,

Rihan Zhang,

Chunming He,

Sina Farsiu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Fengyang and Hu, Peng and Xu, Lei and Guo, XingE and Qin, Guanyi and Shen, Yuqi and Fang, Chengyu and Zhang, Rihan and He, Chunming and Farsiu, Sina}, title = {Beyond Ground-Truth: Leveraging Image Quality Priors for Real-World Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29897-29908} }
MindDriver: Introducing Progressive Multimodal Reasoning for Autonomous Driving: Lingjun Zhang,

Yujian Yuan,

Changjie Wu,

Xinyuan Chang,

Xin Cai,

Shuang Zeng,

Linzhe Shi,

Sijin Wang,

Hang Zhang,

Mu Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lingjun and Yuan, Yujian and Wu, Changjie and Chang, Xinyuan and Cai, Xin and Zeng, Shuang and Shi, Linzhe and Wang, Sijin and Zhang, Hang and Xu, Mu}, title = {MindDriver: Introducing Progressive Multimodal Reasoning for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17831-17841} }
Hybrid Token Compression for Vision-Language Models: Jusheng Zhang,

Xiaoyang Guo,

Kaitong Cai,

Qinhan Lv,

Yijia Fan,

Wenhao Chai,

Jian Wang,

Keze Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Guo, Xiaoyang and Cai, Kaitong and Lv, Qinhan and Fan, Yijia and Chai, Wenhao and Wang, Jian and Wang, Keze}, title = {Hybrid Token Compression for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31889-31899} }
Trust-calibrated Collaborative Learning for Long-Tailed Visual Recognition: Hao Zhou,

Tingjin Luo; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Luo, Tingjin}, title = {Trust-calibrated Collaborative Learning for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40866-40875} }
STCast: Adaptive Boundary Alignment for Global and Regional Weather Forecasting: Hao Chen,

Tao Han,

Jie Zhang,

Song Guo,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hao and Han, Tao and Zhang, Jie and Guo, Song and Bai, Lei}, title = {STCast: Adaptive Boundary Alignment for Global and Regional Weather Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20586-20596} }
Parameter-Efficient Adaptation for MLLMs via Implicit Modality Decomposition: Mingfang Zhang,

Yunhong Wang,

Lu Wang,

Jiaxin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingfang and Wang, Yunhong and Wang, Lu and Chen, Jiaxin}, title = {Parameter-Efficient Adaptation for MLLMs via Implicit Modality Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37745-37755} }
Resolving Endpoint Underfitting in Diffusion Bridges via Noise Alignment: Yurong Gao,

Zicheng Zhang,

Congying Han,

Tiande Guo,

Xinmin Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yurong and Zhang, Zicheng and Han, Congying and Guo, Tiande and Qiu, Xinmin}, title = {Resolving Endpoint Underfitting in Diffusion Bridges via Noise Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27388-27397} }
Rethinking Two-Stage Referring-by-Tracking in Referring Multi-Object Tracking: Make it Strong Again: Weize Li,

Yunhao Du,

Qixiang Yin,

Zhicheng Zhao,

Fei Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weize and Du, Yunhao and Yin, Qixiang and Zhao, Zhicheng and Su, Fei}, title = {Rethinking Two-Stage Referring-by-Tracking in Referring Multi-Object Tracking: Make it Strong Again}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42549-42559} }
FaithFusion: Harmonizing Reconstruction and Generation via Pixel-wise Information Gain: YuAn Wang,

Xiaofan Li,

Chi Huang,

Wenhao Zhang,

Hao Li,

Bosheng Wang,

Xun Sun,

Jun Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, YuAn and Li, Xiaofan and Huang, Chi and Zhang, Wenhao and Li, Hao and Wang, Bosheng and Sun, Xun and Wang, Jun}, title = {FaithFusion: Harmonizing Reconstruction and Generation via Pixel-wise Information Gain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1198-1209} }
Pointer-CAD: Unifying B-Rep and Command Sequences via Pointer-based Edges & Faces Selection: Dacheng Qi,

Chenyu Wang,

Jingwei Xu,

Tianzhe Chu,

Zibo Zhao,

Wen Liu,

Wenrui Ding,

Yi Ma,

Shenghua Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Dacheng and Wang, Chenyu and Xu, Jingwei and Chu, Tianzhe and Zhao, Zibo and Liu, Wen and Ding, Wenrui and Ma, Yi and Gao, Shenghua}, title = {Pointer-CAD: Unifying B-Rep and Command Sequences via Pointer-based Edges \& Faces Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17377-17387} }
VarSplat: Uncertainty-aware 3D Gaussian Splatting for Robust RGB-D SLAM: Anh Thuan Tran,

Jana Kosecka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Anh Thuan and Kosecka, Jana}, title = {VarSplat: Uncertainty-aware 3D Gaussian Splatting for Robust RGB-D SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26072-26082} }
SafeDrive: Fine-Grained Safety Reasoning for End-to-End Driving in a Sparse World: Jungho Kim,

Jiyong Oh,

Seunghoon Yu,

Hongjae Shin,

Donghyuk Kwak,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jungho and Oh, Jiyong and Yu, Seunghoon and Shin, Hongjae and Kwak, Donghyuk and Choi, Jun Won}, title = {SafeDrive: Fine-Grained Safety Reasoning for End-to-End Driving in a Sparse World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24854-24864} }
StreamDiT: Real-Time Streaming Text-to-Video Generation: Akio Kodaira,

Tingbo Hou,

Ji Hou,

Markos Georgopoulos,

Felix Juefei-Xu,

Masayoshi Tomizuka,

Yue Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kodaira_2026_CVPR, author = {Kodaira, Akio and Hou, Tingbo and Hou, Ji and Georgopoulos, Markos and Juefei-Xu, Felix and Tomizuka, Masayoshi and Zhao, Yue}, title = {StreamDiT: Real-Time Streaming Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29200-29210} }
IR-HGP: Physically-Aware Gaussian Inverse Rendering for High-Illumination Scenes via Generative Priors: Qingan Zhang,

Wensheng Li,

Chengying Gao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qingan and Li, Wensheng and Gao, Chengying}, title = {IR-HGP: Physically-Aware Gaussian Inverse Rendering for High-Illumination Scenes via Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1210-1220} }
GuideFlow: Constraint-Guided Flow Matching for Planning in End-to-End Autonomous Driving: Lin Liu,

Caiyan Jia,

Guanyi Yu,

Ziying Song,

Junqiao Li,

Feiyang Jia,

Peiliang Wu,

Xiaoshuai Hao,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Lin and Jia, Caiyan and Yu, Guanyi and Song, Ziying and Li, Junqiao and Jia, Feiyang and Wu, Peiliang and Hao, Xiaoshuai and Luo, Yadan}, title = {GuideFlow: Constraint-Guided Flow Matching for Planning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3719-3728} }
CryoKRAQEN: Kernel-Regularized Annealing for Quantized Embedding Networks in Cryo-EM Heterogeneous Reconstruction: Wenyuan Gao,

Yutan Wu,

Xuming He; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Wenyuan and Wu, Yutan and He, Xuming}, title = {CryoKRAQEN: Kernel-Regularized Annealing for Quantized Embedding Networks in Cryo-EM Heterogeneous Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28298-28307} }
mVLM: A Vision Language Model for mNPUs: Zijie Chen,

Guiyun Fan,

Zhaoxing Yang,

Rong Ding,

Haiming Jin; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zijie and Fan, Guiyun and Yang, Zhaoxing and Ding, Rong and Jin, Haiming}, title = {mVLM: A Vision Language Model for mNPUs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18892-18902} }
The Image as Its Own Reward: Reinforcement Learning with Adversarial Reward for Image Generation: Weijia Mao,

Hao Chen,

Zhenheng Yang,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Weijia and Chen, Hao and Yang, Zhenheng and Shou, Mike Zheng}, title = {The Image as Its Own Reward: Reinforcement Learning with Adversarial Reward for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5999-6009} }
Decompose and Transfer: CoT-Prompting Enhanced Alignment for Open-Vocabulary Temporal Action Detection: Sa Zhu,

Wanqian Zhang,

Lin Wang,

Xiaohua Chen,

Chenxu Cui,

Jinchao Zhang,

Bo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Sa and Zhang, Wanqian and Wang, Lin and Chen, Xiaohua and Cui, Chenxu and Zhang, Jinchao and Li, Bo}, title = {Decompose and Transfer: CoT-Prompting Enhanced Alignment for Open-Vocabulary Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20334-20344} }
Event Stream Filtering via Probability Flux Estimation: Jinze Chen,

Wei Zhai,

Yang Cao,

Bin Li,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jinze and Zhai, Wei and Cao, Yang and Li, Bin and Zha, Zheng-Jun}, title = {Event Stream Filtering via Probability Flux Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8023-8032} }
Roots Beneath the Cut: Uncovering the Risk of Concept Revival in Pruning-Based Unlearning for Diffusion Models: Ci Zhang,

Zhaojun Ding,

Chence Yang,

Jun Liu,

Xiaoming Zhai,

Shaoyi Huang,

Beiwen Li,

Xiaolong Ma,

Jin Lu,

Geng Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ci and Ding, Zhaojun and Yang, Chence and Liu, Jun and Zhai, Xiaoming and Huang, Shaoyi and Li, Beiwen and Ma, Xiaolong and Lu, Jin and Yuan, Geng}, title = {Roots Beneath the Cut: Uncovering the Risk of Concept Revival in Pruning-Based Unlearning for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35872-35881} }
Layer-wise Instance Binding for Regional and Occlusion Control in Text-to-Image Diffusion Transformers: Ruidong Chen,

Yancheng Bai,

Xuanpu Zhang,

Jianhao Zeng,

Lanjun Wang,

Dan Song,

Lei Sun,

Xiangxiang Chu,

Anan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ruidong and Bai, Yancheng and Zhang, Xuanpu and Zeng, Jianhao and Wang, Lanjun and Song, Dan and Sun, Lei and Chu, Xiangxiang and Liu, Anan}, title = {Layer-wise Instance Binding for Regional and Occlusion Control in Text-to-Image Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11493-11503} }
Efficient Unrolled Networks for Large-Scale 3D Inverse Problems: Romain Vo,

Julián Tachella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vo_2026_CVPR, author = {Vo, Romain and Tachella, Juli\'an}, title = {Efficient Unrolled Networks for Large-Scale 3D Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36343-36353} }
SliderEdit: Continuous Image Editing with Fine-Grained Instruction Control: Arman Zarei,

Samyadeep Basu,

Mobina Pournemat,

Sayan Nag,

Ryan A. Rossi,

Soheil Feizi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zarei_2026_CVPR, author = {Zarei, Arman and Basu, Samyadeep and Pournemat, Mobina and Nag, Sayan and Rossi, Ryan A. and Feizi, Soheil}, title = {SliderEdit: Continuous Image Editing with Fine-Grained Instruction Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14430-14439} }
Open the Motion Door: Atomic Motion Decomposition and Recomposition for Open-Vocabulary Motion Generation: Ke Fan,

Jiangning Zhang,

Ran Yi,

Jingyu Gong,

Yabiao Wang,

Yating Wang,

Xin Tan,

Chengjie Wang,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Ke and Zhang, Jiangning and Yi, Ran and Gong, Jingyu and Wang, Yabiao and Wang, Yating and Tan, Xin and Wang, Chengjie and Ma, Lizhuang}, title = {Open the Motion Door: Atomic Motion Decomposition and Recomposition for Open-Vocabulary Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9330-9341} }
Reinforcement-Guided Synthetic Data Generation for Privacy-Sensitive Identity Recognition: Xuemei Jia,

Jiawei Du,

Hui Wei,

Jun Chen,

Joey Tianyi Zhou,

Zheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Xuemei and Du, Jiawei and Wei, Hui and Chen, Jun and Zhou, Joey Tianyi and Wang, Zheng}, title = {Reinforcement-Guided Synthetic Data Generation for Privacy-Sensitive Identity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20034-20044} }
Faithful Contouring: Near-Lossless 3D Voxel Representation Free from Iso-surface: Yihao Luo,

Xianglong He,

Chuanyu Pan,

Yiwen Chen,

Jiaqi Wu,

Yangguang Li,

Wanli Ouyang,

Yuanming Hu,

Guang Yang,

ChoonHwai Yap; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Yihao and He, Xianglong and Pan, Chuanyu and Chen, Yiwen and Wu, Jiaqi and Li, Yangguang and Ouyang, Wanli and Hu, Yuanming and Yang, Guang and Yap, ChoonHwai}, title = {Faithful Contouring: Near-Lossless 3D Voxel Representation Free from Iso-surface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14408-14418} }
Image Generation from Contextually-Contradictory Prompts: Saar Huberman,

Or Patashnik,

Omer Dahary,

Ron Mokady,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huberman_2026_CVPR, author = {Huberman, Saar and Patashnik, Or and Dahary, Omer and Mokady, Ron and Cohen-Or, Daniel}, title = {Image Generation from Contextually-Contradictory Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14885-14894} }
See Less, See Right: Bi-directional Perceptual Shaping For Multimodal Reasoning: Shuoshuo Zhang,

Yizhen Zhang,

Jingjing Fu,

Lei Song,

Jiang Bian,

Yujiu Yang,

Rui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuoshuo and Zhang, Yizhen and Fu, Jingjing and Song, Lei and Bian, Jiang and Yang, Yujiu and Wang, Rui}, title = {See Less, See Right: Bi-directional Perceptual Shaping For Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33499-33509} }
Mesh-Pro: Asynchronous Advantage-guided Ranking Preference Optimization for Artist-style Quadrilateral Mesh Generation: Zhen Zhou,

Jian Liu,

Biwen Lei,

Jing Xu,

Haohan Weng,

Yiling Zhu,

Zhuo Chen,

Junfeng Fan,

Yunkai Ma,

Dazhao Du,

Song Guo,

Fengshui Jing,

Chunchao Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhen and Liu, Jian and Lei, Biwen and Xu, Jing and Weng, Haohan and Zhu, Yiling and Chen, Zhuo and Fan, Junfeng and Ma, Yunkai and Du, Dazhao and Guo, Song and Jing, Fengshui and Guo, Chunchao}, title = {Mesh-Pro: Asynchronous Advantage-guided Ranking Preference Optimization for Artist-style Quadrilateral Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34248-34258} }
Causality in Video Diffusers is Separable from Denoising: Xingjian Bai,

Guande He,

Zhengqi Li,

Eli Shechtman,

Xun Huang,

Zongze Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Xingjian and He, Guande and Li, Zhengqi and Shechtman, Eli and Huang, Xun and Wu, Zongze}, title = {Causality in Video Diffusers is Separable from Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43373-43384} }
X-PCR: A Benchmark for Cross-modality Progressive Clinical Reasoning in Ophthalmic Diagnosis: Gui Wang,

Zehao Zhong,

YongSong Zhou,

Yudong Li,

Ende Wu,

Wooi Ping Cheah,

Rong Qu,

Jianfeng Ren,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Gui and Zhong, Zehao and Zhou, YongSong and Li, Yudong and Wu, Ende and Cheah, Wooi Ping and Qu, Rong and Ren, Jianfeng and Shen, Linlin}, title = {X-PCR: A Benchmark for Cross-modality Progressive Clinical Reasoning in Ophthalmic Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33110-33120} }
An Empirical Study on How Video-LLMs Answer Video Questions: Chenhui Gou,

Ziyu Ma,

Zicheng Duan,

Haoyu He,

Feng Chen,

Akide Liu,

Bohan Zhuang,

Jianfei Cai,

Hamid Rezatofighi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gou_2026_CVPR, author = {Gou, Chenhui and Ma, Ziyu and Duan, Zicheng and He, Haoyu and Chen, Feng and Liu, Akide and Zhuang, Bohan and Cai, Jianfei and Rezatofighi, Hamid}, title = {An Empirical Study on How Video-LLMs Answer Video Questions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18587-18597} }
Pantheon360: Taming Digital Twin Generation via 3D-Aware 360deg Video Diffusion: Ting-Hsuan Chen,

Ying-Huan Chen,

Tao Tu,

Jie-Ying Lee,

Cho-Ying Wu,

Fangzhou Lin,

Hengyuan Zhang,

David Paz,

Xinyu Huang,

Yuliang Guo,

Yu-Lun Liu,

Yue Wang,

Liu Ren; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ting-Hsuan and Chen, Ying-Huan and Tu, Tao and Lee, Jie-Ying and Wu, Cho-Ying and Lin, Fangzhou and Zhang, Hengyuan and Paz, David and Huang, Xinyu and Guo, Yuliang and Liu, Yu-Lun and Wang, Yue and Ren, Liu}, title = {Pantheon360: Taming Digital Twin Generation via 3D-Aware 360deg Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11138-11149} }
Task-Oriented Data Synthesis and Control-Rectify Sampling for Remote Sensing Semantic Segmentation: Yunkai Yang,

Yudong Zhang,

Kunquan Zhang,

Jinxiao Zhang,

Xinying Chen,

Haohuan Fu,

Runmin Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yunkai and Zhang, Yudong and Zhang, Kunquan and Zhang, Jinxiao and Chen, Xinying and Fu, Haohuan and Dong, Runmin}, title = {Task-Oriented Data Synthesis and Control-Rectify Sampling for Remote Sensing Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42147-42157} }
Robust Spiking Neural Networks by Temporal Mutual Information: Mengting Xu,

Shi Gu,

Peng Lin,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Mengting and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {Robust Spiking Neural Networks by Temporal Mutual Information}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20711-20720} }
Recurrent Reasoning with Vision-Language Models for Estimating Long-Horizon Embodied Task Progress: Yuelin Zhang,

Sijie Cheng,

Chen Li,

Zongzhao Li,

Yuxin Huang,

Yang Liu,

Wenbing Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuelin and Cheng, Sijie and Li, Chen and Li, Zongzhao and Huang, Yuxin and Liu, Yang and Huang, Wenbing}, title = {Recurrent Reasoning with Vision-Language Models for Estimating Long-Horizon Embodied Task Progress}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41150-41159} }
SRA 2: Variational Autoencoder Self-Representation Alignment for Efficient Diffusion Training: Mengmeng Wang,

Dengyang Jiang,

Liuzhuozheng Li,

Yucheng Lin,

Guojiang Shen,

Xiangjie Kong,

Yong Liu,

Guang Dai,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Mengmeng and Jiang, Dengyang and Li, Liuzhuozheng and Lin, Yucheng and Shen, Guojiang and Kong, Xiangjie and Liu, Yong and Dai, Guang and Wang, Jingdong}, title = {SRA 2: Variational Autoencoder Self-Representation Alignment for Efficient Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32978-32987} }
InvCoSS: Inversion-driven Continual Self-supervised Learning in Medical Multi-modal Image Pre-training: Zihao Luo,

Shaohao Rui,

Zhenyu Tang,

Guotai Wang,

Xiaosong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Zihao and Rui, Shaohao and Tang, Zhenyu and Wang, Guotai and Wang, Xiaosong}, title = {InvCoSS: Inversion-driven Continual Self-supervised Learning in Medical Multi-modal Image Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42626-42636} }
Correspondence-Attention Alignment for Multi-View Diffusion Models: Minkyung Kwon,

Jinhyeok Choi,

Jiho Park,

Seonghu Jeon,

Jinhyuk Jang,

Junyoung Seo,

Minseop Kwak,

Jin-Hwa Kim,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Minkyung and Choi, Jinhyeok and Park, Jiho and Jeon, Seonghu and Jang, Jinhyuk and Seo, Junyoung and Kwak, Minseop and Kim, Jin-Hwa and Kim, Seungryong}, title = {Correspondence-Attention Alignment for Multi-View Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2316-2326} }
GaussFusion: Improving 3D Reconstruction in the Wild with A Geometry-Informed Video Generator: Liyuan Zhu,

Manjunath Narayana,

Michal Stary,

Will Hutchcroft,

Gordon Wetzstein,

Iro Armeni; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Liyuan and Narayana, Manjunath and Stary, Michal and Hutchcroft, Will and Wetzstein, Gordon and Armeni, Iro}, title = {GaussFusion: Improving 3D Reconstruction in the Wild with A Geometry-Informed Video Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15432-15442} }
CIGMA: Causal Information-Gain Mechanistic Attribution of Attention Heads in Vision Transformers: Maisha Maliha,

Dean F. Hougen; [pdf] [supp]
[bibtex]
@InProceedings{Maliha_2026_CVPR, author = {Maliha, Maisha and Hougen, Dean F.}, title = {CIGMA: Causal Information-Gain Mechanistic Attribution of Attention Heads in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9891-9900} }
ShelfOcc: Native 3D Supervision beyond LiDAR for Vision-Based Occupancy Estimation: Simon Boeder,

Fabian Gigengack,

Simon Roesler,

Holger Caesar,

Benjamin Risse; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boeder_2026_CVPR, author = {Boeder, Simon and Gigengack, Fabian and Roesler, Simon and Caesar, Holger and Risse, Benjamin}, title = {ShelfOcc: Native 3D Supervision beyond LiDAR for Vision-Based Occupancy Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28620-28631} }
Beyond Single Images: A Comprehensive Benchmark for Album-Level Vision-Language Understanding: Shawn Huang,

Brian Price,

Yifei Fan,

Bryan Morse; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Shawn and Price, Brian and Fan, Yifei and Morse, Bryan}, title = {Beyond Single Images: A Comprehensive Benchmark for Album-Level Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38564-38573} }
AToken: A Unified Tokenizer for Vision: Jiasen Lu,

Liangchen Song,

Mingze Xu,

Byeongjoo Ahn,

Yanjun Wang,

Chen Chen,

Afshin Dehghan,

Yinfei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jiasen and Song, Liangchen and Xu, Mingze and Ahn, Byeongjoo and Wang, Yanjun and Chen, Chen and Dehghan, Afshin and Yang, Yinfei}, title = {AToken: A Unified Tokenizer for Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28701-28711} }
Active Perceptual Inference: A Corticothalamic-Inspired Dynamic Nested Recurrent Network for Multimodal Sentiment Analysis with Incomplete Data: Yujuan Zhang,

Qing Li,

Ziyu Li,

Xiuxing Li,

Zhuo Wang,

Mengrui Xu,

Xia Wu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yujuan and Li, Qing and Li, Ziyu and Li, Xiuxing and Wang, Zhuo and Xu, Mengrui and Wu, Xia}, title = {Active Perceptual Inference: A Corticothalamic-Inspired Dynamic Nested Recurrent Network for Multimodal Sentiment Analysis with Incomplete Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1787-1797} }
Urban-GS: A Unified 3D Gaussian Splatting Framework for Compact and High-Fidelity Aerial-to-Street Reconstruction: Meng Wang,

Changqun Xia,

Yuze Wang,

Junyi Wang,

Wantong Duan,

Xinxiong Xie,

Yue Qi; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Meng and Xia, Changqun and Wang, Yuze and Wang, Junyi and Duan, Wantong and Xie, Xinxiong and Qi, Yue}, title = {Urban-GS: A Unified 3D Gaussian Splatting Framework for Compact and High-Fidelity Aerial-to-Street Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33207-33216} }
Towards Generalized Representations for Low-Light Understanding: When Signal Constancy Meets Semantic Enrichment: Yifan Li,

Haofeng Huang,

Wenhan Yang,

Jiaying Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yifan and Huang, Haofeng and Yang, Wenhan and Liu, Jiaying}, title = {Towards Generalized Representations for Low-Light Understanding: When Signal Constancy Meets Semantic Enrichment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1386-1395} }
Bridging RGB and Hematoxylin Components: An Interleaved Guidance and Fusion Framework for Point Supervised Nuclei Segmentation: Zihan Huan,

Xipeng Pan,

Hualong Zhang,

Siyang Feng,

Rushi Lan,

Huadeng Wang,

Haoxiang Lu,

Zhenbing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huan_2026_CVPR, author = {Huan, Zihan and Pan, Xipeng and Zhang, Hualong and Feng, Siyang and Lan, Rushi and Wang, Huadeng and Lu, Haoxiang and Liu, Zhenbing}, title = {Bridging RGB and Hematoxylin Components: An Interleaved Guidance and Fusion Framework for Point Supervised Nuclei Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37518-37527} }
Tri-Subspaces Disentanglement for Multimodal Sentiment Analysis: Chunlei Meng,

Jiabin Luo,

Zhenglin Yan,

Zhenyu Yu,

Rong Fu,

Zhongxue Gan,

Chun Ouyang; [pdf] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Chunlei and Luo, Jiabin and Yan, Zhenglin and Yu, Zhenyu and Fu, Rong and Gan, Zhongxue and Ouyang, Chun}, title = {Tri-Subspaces Disentanglement for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8791-8800} }
Event-based Visual Deformation Measurement: Yuliang Wu,

Wei Zhai,

Yuxin Cui,

Tiesong Zhao,

Yang Cao,

Zheng-Jun Zha; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yuliang and Zhai, Wei and Cui, Yuxin and Zhao, Tiesong and Cao, Yang and Zha, Zheng-Jun}, title = {Event-based Visual Deformation Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {903-913} }
Masking Matters: Unlocking the Spatial Reasoning Capabilities of LLMs for 3D Scene-Language Understanding: Yerim Jeon,

Miso Lee,

WonJun Moon,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2026_CVPR, author = {Jeon, Yerim and Lee, Miso and Moon, WonJun and Heo, Jae-Pil}, title = {Masking Matters: Unlocking the Spatial Reasoning Capabilities of LLMs for 3D Scene-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38668-38677} }
GeoDexGrasp: Geometry-aware Generation for Data-efficient and Physics-plausible Dexterous Grasping: Bing Han,

Weiyuan Liu,

Changlong Zhang,

Chenxi Wang,

Zhibin Zhao,

Zhi Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Bing and Liu, Weiyuan and Zhang, Changlong and Wang, Chenxi and Zhao, Zhibin and Zhai, Zhi}, title = {GeoDexGrasp: Geometry-aware Generation for Data-efficient and Physics-plausible Dexterous Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6729-6739} }
PriVi: Towards a General-Purpose Video Model for Primate Behavior in the Wild: Felix B. Mueller,

Jan F. Meier,

Timo Lueddecke,

Richard Vogg,

Roger L. Freixanet,

Valentin Hassler,

Tiffany Bosshard,

Elif Karakoc,

William J. O'Hearn,

Sofia M. Pereira,

Sandro Sehner,

Kaja Wierucka,

Judith Burkart,

Claudia Fichtel,

Julia Fischer,

Alexander Gail,

Catherine Hobaiter,

Julia Ostner,

Liran Samuni,

Oliver Schülke,

Neda Shahidi,

Erin G. Wessling,

Alexander S. Ecker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mueller_2026_CVPR, author = {Mueller, Felix B. and Meier, Jan F. and Lueddecke, Timo and Vogg, Richard and Freixanet, Roger L. and Hassler, Valentin and Bosshard, Tiffany and Karakoc, Elif and O'Hearn, William J. and Pereira, Sofia M. and Sehner, Sandro and Wierucka, Kaja and Burkart, Judith and Fichtel, Claudia and Fischer, Julia and Gail, Alexander and Hobaiter, Catherine and Ostner, Julia and Samuni, Liran and Sch\"ulke, Oliver and Shahidi, Neda and Wessling, Erin G. and Ecker, Alexander S.}, title = {PriVi: Towards a General-Purpose Video Model for Primate Behavior in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38804-38815} }
Beyond Soft Label: Dataset Distillation via Orthogonal Gradient Matching: Deyu Bo,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Deyu and Wang, Xinchao}, title = {Beyond Soft Label: Dataset Distillation via Orthogonal Gradient Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5596-5605} }
CUBic: Coordinated Unified Bimanual Perception and Control Framework: Xingyu Wang,

Pengxiang Ding,

Jingkai Xu,

Donglin Wang,

Zhaoxin Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xingyu and Ding, Pengxiang and Xu, Jingkai and Wang, Donglin and Fan, Zhaoxin}, title = {CUBic: Coordinated Unified Bimanual Perception and Control Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20856-20866} }
CHIPS: Efficient CLIP Adaptation via Curvature-aware Hybrid Influence-based Data Selection: Xinlin Zhuang,

Yichen Li,

Xiwei Liu,

Haolin Yang,

Yifan Lu,

Ziyun Zou,

Yulong Li,

Huifa Li,

Dongliang Chen,

Qinglei Wang,

Weiyang Liu,

Ying Qian,

Jiangming Shi,

Imran Razzak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xinlin and Li, Yichen and Liu, Xiwei and Yang, Haolin and Lu, Yifan and Zou, Ziyun and Li, Yulong and Li, Huifa and Chen, Dongliang and Wang, Qinglei and Liu, Weiyang and Qian, Ying and Shi, Jiangming and Razzak, Imran}, title = {CHIPS: Efficient CLIP Adaptation via Curvature-aware Hybrid Influence-based Data Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29483-29493} }
Fast-ThinkAct: Efficient Vision-Language-Action Reasoning via Verbalizable Latent Planning: Chi-Pin Huang,

Yunze Man,

Zhiding Yu,

Min-Hung Chen,

Jan Kautz,

Yu-Chiang Frank Wang,

Fu-En Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Chi-Pin and Man, Yunze and Yu, Zhiding and Chen, Min-Hung and Kautz, Jan and Wang, Yu-Chiang Frank and Yang, Fu-En}, title = {Fast-ThinkAct: Efficient Vision-Language-Action Reasoning via Verbalizable Latent Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5070-5081} }
Unposed-to-3D: Learning Simulation-Ready Vehicles from Real-World Images: Hongyuan Liu,

Bochao Zou,

Qiankun Liu,

Haochen Yu,

Qi Mei,

Jianfei Jiang,

Chen Liu,

Cheng Bi,

Zhao Wang,

Xueyang Zhang,

Yifei Zhan,

Jiansheng Chen,

Huimin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hongyuan and Zou, Bochao and Liu, Qiankun and Yu, Haochen and Mei, Qi and Jiang, Jianfei and Liu, Chen and Bi, Cheng and Wang, Zhao and Zhang, Xueyang and Zhan, Yifei and Chen, Jiansheng and Ma, Huimin}, title = {Unposed-to-3D: Learning Simulation-Ready Vehicles from Real-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24843-24853} }
Geometry-Guided 3D Visual Token Pruning for Video-Language Models: Han Li,

Zehao Huang,

Jiahui Fu,

Naiyan Wang,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Han and Huang, Zehao and Fu, Jiahui and Wang, Naiyan and Liu, Si}, title = {Geometry-Guided 3D Visual Token Pruning for Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9615-9625} }
DiP: Taming Diffusion Models in Pixel Space: Zhennan Chen,

Junwei Zhu,

Xu Chen,

Jiangning Zhang,

Xiaobin Hu,

Hanzhen Zhao,

Chengjie Wang,

Jian Yang,

Ying Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhennan and Zhu, Junwei and Chen, Xu and Zhang, Jiangning and Hu, Xiaobin and Zhao, Hanzhen and Wang, Chengjie and Yang, Jian and Tai, Ying}, title = {DiP: Taming Diffusion Models in Pixel Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36136-36146} }
ReasonX: MLLM-Guided Intrinsic Image Decomposition: Alara Dirik,

Tuanfeng Yang Wang,

Duygu Ceylan,

Stefanos Zafeiriou,

Anna Frühstück; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dirik_2026_CVPR, author = {Dirik, Alara and Wang, Tuanfeng Yang and Ceylan, Duygu and Zafeiriou, Stefanos and Fr\"uhst\"uck, Anna}, title = {ReasonX: MLLM-Guided Intrinsic Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30802-30812} }
DyFCLT: Dynamic Frequency-Decoupled Cross-Modal Learning Transformer for Multimodal Tiny Object Detection: Chaolang Li,

Pengwen Dai,

Jingyu Li,

Siyuan Yao,

Yuchen Jiang,

Zhuoran Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chaolang and Dai, Pengwen and Li, Jingyu and Yao, Siyuan and Jiang, Yuchen and Zheng, Zhuoran}, title = {DyFCLT: Dynamic Frequency-Decoupled Cross-Modal Learning Transformer for Multimodal Tiny Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11313-11323} }
ReLaX: Reasoning with Latent Exploration for Large Reasoning Models: Shimin Zhang,

Xianwei Chen,

Yufan Shen,

Ziyuan Ye,

Jibin Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shimin and Chen, Xianwei and Shen, Yufan and Ye, Ziyuan and Wu, Jibin}, title = {ReLaX: Reasoning with Latent Exploration for Large Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33761-33771} }
SketchAssist: A Practical Assistant for Semantic Edits and Precise Local Redrawing: Han Zou,

Yan Zhang,

Ruiqi Yu,

Cong Xie,

Jie Huang,

Zhenpeng Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Han and Zhang, Yan and Yu, Ruiqi and Xie, Cong and Huang, Jie and Zhan, Zhenpeng}, title = {SketchAssist: A Practical Assistant for Semantic Edits and Precise Local Redrawing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16258-16267} }
Residual Primitive Fitting of 3D Shapes with SuperFrusta: Aditya Ganeshan,

Matheus Gadelha,

Thibault Groueix,

Zhiqin Chen,

Siddhartha Chaudhuri,

Vladimir Kim,

Wang Yifan,

Daniel Ritchie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganeshan_2026_CVPR, author = {Ganeshan, Aditya and Gadelha, Matheus and Groueix, Thibault and Chen, Zhiqin and Chaudhuri, Siddhartha and Kim, Vladimir and Yifan, Wang and Ritchie, Daniel}, title = {Residual Primitive Fitting of 3D Shapes with SuperFrusta}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7404-7413} }
Continuous Exposure-Time Modeling for Realistic Atmospheric Turbulence Synthesis: Junwei Zeng,

Dong Liang,

Sheng-Jun Huang,

Kun Zhan,

Songcan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Junwei and Liang, Dong and Huang, Sheng-Jun and Zhan, Kun and Chen, Songcan}, title = {Continuous Exposure-Time Modeling for Realistic Atmospheric Turbulence Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26678-26687} }
DiffBMP: Differentiable Rendering with Bitmap Primitives: Seongmin Hong,

Junghun James Kim,

Daehyeop Kim,

Insoo Chung,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Seongmin and Kim, Junghun James and Kim, Daehyeop and Chung, Insoo and Chun, Se Young}, title = {DiffBMP: Differentiable Rendering with Bitmap Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26741-26750} }
BrickNet: Graph-Backed Generative Brick Assembly: Peter Kulits,

Cordelia Schmid; [pdf] [arXiv]
[bibtex]
@InProceedings{Kulits_2026_CVPR, author = {Kulits, Peter and Schmid, Cordelia}, title = {BrickNet: Graph-Backed Generative Brick Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39252-39261} }
Diffusion Mental Averages: Phonphrm Thawatdamrongkit,

Sukit Seripanitkarn,

Supasorn Suwajanakorn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thawatdamrongkit_2026_CVPR, author = {Thawatdamrongkit, Phonphrm and Seripanitkarn, Sukit and Suwajanakorn, Supasorn}, title = {Diffusion Mental Averages}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35713-35725} }
SFR-Net: Steering-Fusion-Refining Network in Multi-label Zero-Shot Sewer Defect Detection: Zhao-Min Chen,

Xinjian Huang,

Yisu Ge,

Yu Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhao-Min and Huang, Xinjian and Ge, Yisu and Li, Yu}, title = {SFR-Net: Steering-Fusion-Refining Network in Multi-label Zero-Shot Sewer Defect Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41636-41645} }
SAM 3D: 3Dfy Anything in Images: Xingyu Chen,

FU-JEN CHU,

Pierre Gleize,

Kevin J Liang,

Alexander Sax,

Hao Tang,

Weiyao Wang,

Michelle Guo,

Thibaut Hardin,

Xiang Li,

Aohan Lin,

Jia-Wei Liu,

Ziqi Ma,

Anushka Sagar,

Bowen Song,

Xiaodong Wang,

Jianing Yang,

Bowen Zhang,

Piotr Dollár,

Georgia Gkioxari,

Matt Feiszli,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xingyu and CHU, FU-JEN and Gleize, Pierre and Liang, Kevin J and Sax, Alexander and Tang, Hao and Wang, Weiyao and Guo, Michelle and Hardin, Thibaut and Li, Xiang and Lin, Aohan and Liu, Jia-Wei and Ma, Ziqi and Sagar, Anushka and Song, Bowen and Wang, Xiaodong and Yang, Jianing and Zhang, Bowen and Doll\'ar, Piotr and Gkioxari, Georgia and Feiszli, Matt and Malik, Jitendra}, title = {SAM 3D: 3Dfy Anything in Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7220-7232} }
RNED: Rotary Number Encoding and Decoding for Medical VLMs: Fengbei Liu,

Sunwoo Kwak,

Nusrat Nizam,

Ilan Richter,

Ashley Beecy,

Jayant Raikhelkar,

Deborah Estrin,

Mert R. Sabuncu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Fengbei and Kwak, Sunwoo and Nizam, Nusrat and Richter, Ilan and Beecy, Ashley and Raikhelkar, Jayant and Estrin, Deborah and Sabuncu, Mert R.}, title = {RNED: Rotary Number Encoding and Decoding for Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13722-13731} }
LumiMotion: Improving Gaussian Relighting with Scene Dynamics: Joanna Kaleta,

Piotr Wójcik,

Kacper Marzol,

Tomasz Trzcinski,

Kacper Kania,

Marek Kowalski; [pdf] [supp]
[bibtex]
@InProceedings{Kaleta_2026_CVPR, author = {Kaleta, Joanna and W\'ojcik, Piotr and Marzol, Kacper and Trzcinski, Tomasz and Kania, Kacper and Kowalski, Marek}, title = {LumiMotion: Improving Gaussian Relighting with Scene Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37311-37321} }
Distribution-Aligned Multimodal Fusion for Robust Object Detection: Xiaohui Hao,

Yanglin Pu,

Yongjun Wang,

Rui She; [pdf]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Xiaohui and Pu, Yanglin and Wang, Yongjun and She, Rui}, title = {Distribution-Aligned Multimodal Fusion for Robust Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25494-25503} }
CogniEdit: Dense Gradient Flow Optimization for Fine-Grained Image Editing: Yan Li,

Lin Liu,

Xiaopeng Zhang,

Wei Xue,

Wenhan Luo,

Yike Guo,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Liu, Lin and Zhang, Xiaopeng and Xue, Wei and Luo, Wenhan and Guo, Yike and Tian, Qi}, title = {CogniEdit: Dense Gradient Flow Optimization for Fine-Grained Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1145-1154} }
BiOTPrompt: Bidirectional Optimal Transport Guided Prompting for Disease Evolution-aware Radiology Report Generation: Tengfei Liu,

Yijian Fan,

Boyue Wang,

Yongli Hu,

Mingjie Li,

Jinghua Li,

Junbin Gao,

Xiaojun Chang,

Zhihui Li,

Baocai Yin; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tengfei and Fan, Yijian and Wang, Boyue and Hu, Yongli and Li, Mingjie and Li, Jinghua and Gao, Junbin and Chang, Xiaojun and Li, Zhihui and Yin, Baocai}, title = {BiOTPrompt: Bidirectional Optimal Transport Guided Prompting for Disease Evolution-aware Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13755-13765} }
D2Dewarp: Dual Dimensions Geometric Representation Learning Based Document Image Dewarping: Heng Li,

Xiangping Wu,

Qingcai Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Heng and Wu, Xiangping and Chen, Qingcai}, title = {D2Dewarp: Dual Dimensions Geometric Representation Learning Based Document Image Dewarping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34734-34744} }
SegMoTE: Token-Level Mixture of Experts for Medical Image Segmentation: Yujie Lu,

Jingwen Li,

Sibo Ju,

Yanzhou Su,

He Yao,

Yisong Liu,

Min Zhu,

Junlong Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yujie and Li, Jingwen and Ju, Sibo and Su, Yanzhou and Yao, He and Liu, Yisong and Zhu, Min and Cheng, Junlong}, title = {SegMoTE: Token-Level Mixture of Experts for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36332-36342} }
DRiffusion: Draft-and-Refine Process Parallelizes Diffusion Models with Ease: Runsheng Bai,

Chengyu Zhang,

Yangdong Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Runsheng and Zhang, Chengyu and Deng, Yangdong}, title = {DRiffusion: Draft-and-Refine Process Parallelizes Diffusion Models with Ease}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16551-16560} }
From Selection to Scheduling: Federated Geometry-Aware Correction Makes Exemplar Replay Work Better under Continual Dynamic Heterogeneity: Zhuang Qi,

Ying-Peng Tang,

Lei Meng,

Guoqing Chao,

Lei Wu,

Han Yu,

Xiangxu Meng; [pdf] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Zhuang and Tang, Ying-Peng and Meng, Lei and Chao, Guoqing and Wu, Lei and Yu, Han and Meng, Xiangxu}, title = {From Selection to Scheduling: Federated Geometry-Aware Correction Makes Exemplar Replay Work Better under Continual Dynamic Heterogeneity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17494-17504} }
STAR: Test-Time Adaptation Can Enhance Universal Prompt Learning for Vision-Language Models: Yiwei Fu,

Hui Wan,

Xiao Luo,

Minghua Deng; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Yiwei and Wan, Hui and Luo, Xiao and Deng, Minghua}, title = {STAR: Test-Time Adaptation Can Enhance Universal Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31482-31492} }
Anti-Degradation Lifelong Multi-View Clustering: Xingfeng Li,

Hao Pan,

Honglin Yuan,

Yuan Sun,

Xujian Zhao,

Jiaqi Lin,

Zhenwen Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xingfeng and Pan, Hao and Yuan, Honglin and Sun, Yuan and Zhao, Xujian and Lin, Jiaqi and Ren, Zhenwen}, title = {Anti-Degradation Lifelong Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8749-8759} }
FedSDR: Federated Graph Learning with Structural Noise Detection and Reconstruction: Jiaqi Liu,

Zihan Tan,

Guancheng Wan,

Wenke Huang,

He Li,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Tan, Zihan and Wan, Guancheng and Huang, Wenke and Li, He and Ye, Mang}, title = {FedSDR: Federated Graph Learning with Structural Noise Detection and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3379-3389} }
SpeeDe3DGS: Speedy Deformable 3D Gaussian Splatting with Temporal Pruning and Motion Grouping: Allen Tu,

Haiyang Ying,

Alex Hanson,

Yonghan Lee,

Tom Goldstein,

Matthias Zwicker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Allen and Ying, Haiyang and Hanson, Alex and Lee, Yonghan and Goldstein, Tom and Zwicker, Matthias}, title = {SpeeDe3DGS: Speedy Deformable 3D Gaussian Splatting with Temporal Pruning and Motion Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26083-26093} }
Semantic Context Matters: Improving Conditioning for Autoregressive Models: Dongyang Jin,

Ryan Xu,

Jianhao Zeng,

Rui Lan,

Yancheng Bai,

Lei Sun,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Dongyang and Xu, Ryan and Zeng, Jianhao and Lan, Rui and Bai, Yancheng and Sun, Lei and Chu, Xiangxiang}, title = {Semantic Context Matters: Improving Conditioning for Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30402-30413} }
SRA-Det: Learning Omni-Grained Open-Vocabulary Detection Beyond Category Names: Li Yang,

Boyu Cai,

Wei Liu,

Yan Wang,

Chunfeng Yuan,

Bing Li,

Weiming Hu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Li and Cai, Boyu and Liu, Wei and Wang, Yan and Yuan, Chunfeng and Li, Bing and Hu, Weiming}, title = {SRA-Det: Learning Omni-Grained Open-Vocabulary Detection Beyond Category Names}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27611-27620} }
ReScene4D: Temporally Consistent Semantic Instance Segmentation of Evolving Indoor 3D Scenes: Emily Steiner,

Jianhao Zheng,

Henry Howard-Jenkins,

Chris Xie,

Iro Armeni; [pdf] [supp]
[bibtex]
@InProceedings{Steiner_2026_CVPR, author = {Steiner, Emily and Zheng, Jianhao and Howard-Jenkins, Henry and Xie, Chris and Armeni, Iro}, title = {ReScene4D: Temporally Consistent Semantic Instance Segmentation of Evolving Indoor 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10710-10720} }
Thinking with Frames: Generative Video Distortion Evaluation via Frame Reward Model: Yuan Wang,

Borui Liao,

Huijuan Huang,

Jinda Lu,

Ouxiang Li,

Kuien Liu,

Meng Wang,

Xiang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuan and Liao, Borui and Huang, Huijuan and Lu, Jinda and Li, Ouxiang and Liu, Kuien and Wang, Meng and Wang, Xiang}, title = {Thinking with Frames: Generative Video Distortion Evaluation via Frame Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4526-4536} }
LiteVGGT: Boosting Vanilla VGGT via Geometry-aware Cached Token Merging: Zhijian Shu,

Cheng Lin,

Tao Xie,

Wei Yin,

Ben Li,

Zhiyuan Pu,

Weize Li,

Yao Yao,

Xun Cao,

Xiaoyang Guo,

Xiao-Xiao Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shu_2026_CVPR, author = {Shu, Zhijian and Lin, Cheng and Xie, Tao and Yin, Wei and Li, Ben and Pu, Zhiyuan and Li, Weize and Yao, Yao and Cao, Xun and Guo, Xiaoyang and Long, Xiao-Xiao}, title = {LiteVGGT: Boosting Vanilla VGGT via Geometry-aware Cached Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36422-36432} }
Lipschitz Optimization for Formal Verification of Homographies: Jean-Guillaume Durand,

Panagiotis Kouvaros,

Maxime Gariel,

Alessio Lomuscio; [pdf] [supp]
[bibtex]
@InProceedings{Durand_2026_CVPR, author = {Durand, Jean-Guillaume and Kouvaros, Panagiotis and Gariel, Maxime and Lomuscio, Alessio}, title = {Lipschitz Optimization for Formal Verification of Homographies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13306-13315} }
Adapter Shield: A Unified Framework with Built-in Authentication for Preventing Unauthorized Zero-Shot Image-to-Image Generation: Jun Jia,

Hongyi Miao,

Yingjie Zhou,

Wangqiu Zhou,

Jianbo Zhang,

Linhan Cao,

Dandan Zhu,

Hua Yang,

Xiongkuo Min,

Wei Sun,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Jun and Miao, Hongyi and Zhou, Yingjie and Zhou, Wangqiu and Zhang, Jianbo and Cao, Linhan and Zhu, Dandan and Yang, Hua and Min, Xiongkuo and Sun, Wei and Zhai, Guangtao}, title = {Adapter Shield: A Unified Framework with Built-in Authentication for Preventing Unauthorized Zero-Shot Image-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30120-30129} }
AsymLoc: Towards Asymmetric Feature Matching for Efficient Visual Localization: Mohammad Omama,

Gabriele Berton,

Eric Foxlin,

Yelin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Omama_2026_CVPR, author = {Omama, Mohammad and Berton, Gabriele and Foxlin, Eric and Kim, Yelin}, title = {AsymLoc: Towards Asymmetric Feature Matching for Efficient Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26441-26451} }
Med-CMR: A Fine-Grained Benchmark Integrating Visual Evidence and Clinical Logic for Medical Complex Multimodal Reasoning: Haozhen Gong,

Xiaozhong Ji,

Yuansen Liu,

Wenbin Wu,

Xiaoxiao Yan,

Jingjing Liu,

Kai Wu,

Jiazhen Pan,

Bailiang Jian,

Jiangning Zhang,

Xiaobin Hu,

Hongwei Bran Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Haozhen and Ji, Xiaozhong and Liu, Yuansen and Wu, Wenbin and Yan, Xiaoxiao and Liu, Jingjing and Wu, Kai and Pan, Jiazhen and Jian, Bailiang and Zhang, Jiangning and Hu, Xiaobin and Li, Hongwei Bran}, title = {Med-CMR: A Fine-Grained Benchmark Integrating Visual Evidence and Clinical Logic for Medical Complex Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41224-41234} }
PlannerRFT: Reinforcing Diffusion Planners through Closed-Loop and Sample-Efficient Fine-Tuning: Hongchen Li,

Tianyu Li,

Jiazhi Yang,

Mingyang Shang,

Gaoqiang Wu,

Caojun Wang,

Haochen Tian,

Zengrong Lin,

Zhihui Hao,

XianPeng Lang,

Jia Hu,

Hongyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hongchen and Li, Tianyu and Yang, Jiazhi and Shang, Mingyang and Wu, Gaoqiang and Wang, Caojun and Tian, Haochen and Lin, Zengrong and Hao, Zhihui and Lang, XianPeng and Hu, Jia and Li, Hongyang}, title = {PlannerRFT: Reinforcing Diffusion Planners through Closed-Loop and Sample-Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24929-24938} }
Glove2Hand: Synthesizing Natural Hand-Object Interaction from Multi-Modal Sensing Gloves: Xinyu Zhang,

Ziyi Kou,

Chuan Qin,

Mia Huang,

Ergys Ristani,

Ankit Kumar,

Lele Chen,

Kun He,

Abdeslam Boularias,

Li Guan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinyu and Kou, Ziyi and Qin, Chuan and Huang, Mia and Ristani, Ergys and Kumar, Ankit and Chen, Lele and He, Kun and Boularias, Abdeslam and Guan, Li}, title = {Glove2Hand: Synthesizing Natural Hand-Object Interaction from Multi-Modal Sensing Gloves}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1829-1840} }
ArtiMuse: Fine-Grained Image Aesthetics Assessment with Joint Scoring and Expert-Level Understanding: Shuo Cao,

Nan Ma,

Jiayang Li,

Xiaohui Li,

Lihao Shao,

Kaiwen Zhu,

Yu Zhou,

Yuandong Pu,

Jiarui Wu,

Jiaquan Wang,

Bo Qu,

Wenhai Wang,

Yu Qiao,

Dajuin Yao,

Yihao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Shuo and Ma, Nan and Li, Jiayang and Li, Xiaohui and Shao, Lihao and Zhu, Kaiwen and Zhou, Yu and Pu, Yuandong and Wu, Jiarui and Wang, Jiaquan and Qu, Bo and Wang, Wenhai and Qiao, Yu and Yao, Dajuin and Liu, Yihao}, title = {ArtiMuse: Fine-Grained Image Aesthetics Assessment with Joint Scoring and Expert-Level Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15313-15322} }
Unifying Precise Keyframes and Semantic Control via Multi-level Diffusion: Linjun Wu,

Jiejia Yu,

Leyang Jin,

He Wang,

Bowen Zheng,

Xu Yang,

Hao Jiang,

Fei Xia,

Fei Ling,

Jun Deng,

Xiaogang Jin; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Linjun and Yu, Jiejia and Jin, Leyang and Wang, He and Zheng, Bowen and Yang, Xu and Jiang, Hao and Xia, Fei and Ling, Fei and Deng, Jun and Jin, Xiaogang}, title = {Unifying Precise Keyframes and Semantic Control via Multi-level Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23473-23483} }
Improving Sparse Autoencoder with Dynamic Attention: Dongsheng Wang,

Jinsen Zhang,

Dawei Su,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dongsheng and Zhang, Jinsen and Su, Dawei and Huang, Hui}, title = {Improving Sparse Autoencoder with Dynamic Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41996-42006} }
SwitchCraft: Training-Free Multi-Event Video Generation with Attention Controls: Qianxun Xu,

Chenxi Song,

Yujun Cai,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qianxun and Song, Chenxi and Cai, Yujun and Zhang, Chi}, title = {SwitchCraft: Training-Free Multi-Event Video Generation with Attention Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29136-29145} }
DUO-VSR: Dual-Stream Distillation for One-Step Video Super-Resolution: Zhengyao Lv,

Menghan Xia,

Xintao Wang,

Kwan-Yee K. Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Zhengyao and Xia, Menghan and Wang, Xintao and Wong, Kwan-Yee K.}, title = {DUO-VSR: Dual-Stream Distillation for One-Step Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16333-16344} }
HiFICL: High-Fidelity In-Context Learning for Multimodal Tasks: Xiaoyu Li,

Yuhang Liu,

Xuanshuo Kang,

Zheng Luo,

Fangqi Lou,

Xiaohua Wu,

Zihan Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaoyu and Liu, Yuhang and Kang, Xuanshuo and Luo, Zheng and Lou, Fangqi and Wu, Xiaohua and Xiong, Zihan}, title = {HiFICL: High-Fidelity In-Context Learning for Multimodal Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3069-3078} }
Hear you are: Teaching LLMs Spatial Reasoning with Vision and Spatial Sound: Hyeonggon Ryu,

Joon Son Chung,

David Harwath; [pdf] [supp]
[bibtex]
@InProceedings{Ryu_2026_CVPR, author = {Ryu, Hyeonggon and Chung, Joon Son and Harwath, David}, title = {Hear you are: Teaching LLMs Spatial Reasoning with Vision and Spatial Sound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38606-38615} }
BUSSARD: Normalizing Flows for Bijective Universal Scene-Specific Anomalous Relationship Detection: Melissa Schween,

Mathis Kruse,

Bodo Rosenhahn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schween_2026_CVPR, author = {Schween, Melissa and Kruse, Mathis and Rosenhahn, Bodo}, title = {BUSSARD: Normalizing Flows for Bijective Universal Scene-Specific Anomalous Relationship Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28512-28523} }
Energy Waveify and Redistribution for Test-Time Adaptation: A Control System Perspective: Zhenbin Wang,

Lei Zhang,

Lituan Wang,

Zhenwei Zhang,

Guangwu Qian,

Yan Wang,

Wei Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhenbin and Zhang, Lei and Wang, Lituan and Zhang, Zhenwei and Qian, Guangwu and Wang, Yan and Huang, Wei}, title = {Energy Waveify and Redistribution for Test-Time Adaptation: A Control System Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15040-15049} }
Ar2Can: An Architect and an Artist Leveraging a Canvas for Multi-Human Generation: Shubhankar Borse,

Phuc Pham,

Farzad Farhadzadeh,

Seokeon Choi,

Phong Nguyen,

Anh Tran,

Sungrack Yun,

Munawar Hayat,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Borse_2026_CVPR, author = {Borse, Shubhankar and Pham, Phuc and Farhadzadeh, Farzad and Choi, Seokeon and Nguyen, Phong and Tran, Anh and Yun, Sungrack and Hayat, Munawar and Porikli, Fatih}, title = {Ar2Can: An Architect and an Artist Leveraging a Canvas for Multi-Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {550-560} }
OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation: Henry Herzog,

Favyen Bastani,

Yawen Zhang,

Gabriel Tseng,

Joseph Redmon,

Hadrien Sablon,

Ryan Park,

Jacob Morrison,

Alexandra Buraczynski,

Karen Farley,

Josh Hansen,

Andrew Howe,

Patrick Alan Johnson,

Mark Otterlee,

Ted Schmitt,

Hunter Pitelka,

Stephen Daspit,

Rachel Ratner,

Christopher Wilhelm,

Sebastian Wood,

Mike Jacobi,

Hannah Kerner,

Evan Shelhamer,

Ali Farhadi,

Ranjay Krishna,

Patrick Beukema; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Herzog_2026_CVPR, author = {Herzog, Henry and Bastani, Favyen and Zhang, Yawen and Tseng, Gabriel and Redmon, Joseph and Sablon, Hadrien and Park, Ryan and Morrison, Jacob and Buraczynski, Alexandra and Farley, Karen and Hansen, Josh and Howe, Andrew and Johnson, Patrick Alan and Otterlee, Mark and Schmitt, Ted and Pitelka, Hunter and Daspit, Stephen and Ratner, Rachel and Wilhelm, Christopher and Wood, Sebastian and Jacobi, Mike and Kerner, Hannah and Shelhamer, Evan and Farhadi, Ali and Krishna, Ranjay and Beukema, Patrick}, title = {OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34806-34817} }
InfiniBench: Infinite Benchmarking for Visual Spatial Reasoning with Customizable Scene Complexity: Haoming Wang,

Qiyao Xue,

Wei Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Haoming and Xue, Qiyao and Gao, Wei}, title = {InfiniBench: Infinite Benchmarking for Visual Spatial Reasoning with Customizable Scene Complexity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21594-21605} }
DICArt: Advancing Category-level Articulated Object Pose Estimation in Discrete State-Spaces: Li Zhang,

Mingyu Mei,

Ailing Wang,

Xianhui Meng,

Yan Zhong,

Xinyuan Song,

Liu Liu,

Rujing Wang,

Zaixing He,

Cewu Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Li and Mei, Mingyu and Wang, Ailing and Meng, Xianhui and Zhong, Yan and Song, Xinyuan and Liu, Liu and Wang, Rujing and He, Zaixing and Lu, Cewu}, title = {DICArt: Advancing Category-level Articulated Object Pose Estimation in Discrete State-Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4687-4697} }
Probabilistic Concept Graph Reasoning for Multimodal Misinformation Detection: Ruichao Yang,

Wei Gao,

Xiaobin Zhu,

Jing Ma,

Hongzhan Lin,

Ziyang Luo,

Bo-Wen Zhang,

Xu-Cheng Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ruichao and Gao, Wei and Zhu, Xiaobin and Ma, Jing and Lin, Hongzhan and Luo, Ziyang and Zhang, Bo-Wen and Yin, Xu-Cheng}, title = {Probabilistic Concept Graph Reasoning for Multimodal Misinformation Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19107-19118} }
AffordGen: Generating Diverse Demonstrations for Generalizable Object Manipulation with Affordance Correspondence: Jiawei Zhang,

Kaizhe Hu,

Yingqian Huang,

Yuanchen Ju,

Zhengrong Xue,

Huazhe Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiawei and Hu, Kaizhe and Huang, Yingqian and Ju, Yuanchen and Xue, Zhengrong and Xu, Huazhe}, title = {AffordGen: Generating Diverse Demonstrations for Generalizable Object Manipulation with Affordance Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15966-15975} }
OmniBrainBench: A Comprehensive Multimodal Benchmark for Brain Imaging Analysis Across Multi-stage Clinical Tasks: Zhihao Peng,

Cheng Wang,

Shengyuan Liu,

Zhiying Liang,

Zanting Ye,

Min Jie Ju,

Peter YM Woo,

Yixuan Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Zhihao and Wang, Cheng and Liu, Shengyuan and Liang, Zhiying and Ye, Zanting and Ju, Min Jie and Woo, Peter YM and Yuan, Yixuan}, title = {OmniBrainBench: A Comprehensive Multimodal Benchmark for Brain Imaging Analysis Across Multi-stage Clinical Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42732-42743} }
QuCNet: Quantum Deep Learning Driven Multi-Circuit Network for Remote Sensing Image Classification: Komal Komal,

Mukul Gupta,

Saumya Singh,

Santosh Kumar Vipparthi,

C.C. Reddy,

Subrahmanyam Murala; [pdf] [supp]
[bibtex]
@InProceedings{Komal_2026_CVPR, author = {Komal, Komal and Gupta, Mukul and Singh, Saumya and Vipparthi, Santosh Kumar and Reddy, C.C. and Murala, Subrahmanyam}, title = {QuCNet: Quantum Deep Learning Driven Multi-Circuit Network for Remote Sensing Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20253-20262} }
Beyond Semantic Search: Towards Referential Anchoring in Composed Image Retrieval: Yuxin Yang,

Yinan Zhou,

Yuxin Chen,

Ziqi Zhang,

Zongyang Ma,

Chunfeng Yuan,

Bing Li,

Jun Gao,

Weiming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuxin and Zhou, Yinan and Chen, Yuxin and Zhang, Ziqi and Ma, Zongyang and Yuan, Chunfeng and Li, Bing and Gao, Jun and Hu, Weiming}, title = {Beyond Semantic Search: Towards Referential Anchoring in Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31155-31165} }
UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders: Matthew Walmer,

Saksham Suri,

Anirud Aggarwal,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Walmer_2026_CVPR, author = {Walmer, Matthew and Suri, Saksham and Aggarwal, Anirud and Shrivastava, Abhinav}, title = {UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41288-41298} }
Rethinking MLLM Itself as a Segmenter with a Single Segmentation Token: Anqi Zhang,

Xiaokang Ji,

Guangyu Gao,

Jianbo Jiao,

Chi Harold Liu,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Anqi and Ji, Xiaokang and Gao, Guangyu and Jiao, Jianbo and Liu, Chi Harold and Wei, Yunchao}, title = {Rethinking MLLM Itself as a Segmenter with a Single Segmentation Token}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19196-19207} }
Scone: Bridging Composition and Distinction in Subject-Driven Image Generation via Unified Understanding-Generation Modeling: Yuran Wang,

Bohan Zeng,

Chengzhuo Tong,

Wenxuan Liu,

Yang Shi,

Xiaochen Ma,

Hao Liang,

Yuanxing Zhang,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuran and Zeng, Bohan and Tong, Chengzhuo and Liu, Wenxuan and Shi, Yang and Ma, Xiaochen and Liang, Hao and Zhang, Yuanxing and Zhang, Wentao}, title = {Scone: Bridging Composition and Distinction in Subject-Driven Image Generation via Unified Understanding-Generation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7773-7783} }
Temporal Imbalance of Positive and Negative Supervision in Class-Incremental Learning: Jinge Ma,

Fengqing Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Jinge and Zhu, Fengqing}, title = {Temporal Imbalance of Positive and Negative Supervision in Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32299-32308} }
Nano-EmoX: Unifying Multimodal Emotional Intelligence from Perception to Empathy: Jiahao Huang,

Fengyan Lin,

Xuechao Yang,

Chen Feng,

Kexin Zhu,

Xu Yang,

Zhide Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiahao and Lin, Fengyan and Yang, Xuechao and Feng, Chen and Zhu, Kexin and Yang, Xu and Chen, Zhide}, title = {Nano-EmoX: Unifying Multimodal Emotional Intelligence from Perception to Empathy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22986-22997} }
Global-Aware Edge Prioritization for Pose Graph Initialization: Tong Wei,

Giorgos Tolias,

Jiri Matas,

Daniel Barath; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Tong and Tolias, Giorgos and Matas, Jiri and Barath, Daniel}, title = {Global-Aware Edge Prioritization for Pose Graph Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28642-28651} }
AdaptVision: Efficient Vision-Language Models via Adaptive Visual Acquisition: Zichuan Lin,

Yicheng Liu,

Yang Yang,

Lvfang Tao,

Deheng Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zichuan and Liu, Yicheng and Yang, Yang and Tao, Lvfang and Ye, Deheng}, title = {AdaptVision: Efficient Vision-Language Models via Adaptive Visual Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11923-11932} }
Beyond Geometry: Artistic Disparity Synthesis for Immersive 2D-to-3D: Ping Chen,

Zezhou Chen,

Xingpeng Zhang,

Yanlin Qian,

Huan Hu,

Xiang Liu,

Zipeng Wang,

Xin Wang,

Zhaoxiang Liu,

Kai Wang,

Shiguo Lian; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ping and Chen, Zezhou and Zhang, Xingpeng and Qian, Yanlin and Hu, Huan and Liu, Xiang and Wang, Zipeng and Wang, Xin and Liu, Zhaoxiang and Wang, Kai and Lian, Shiguo}, title = {Beyond Geometry: Artistic Disparity Synthesis for Immersive 2D-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27114-27123} }
Geometry-driven OOD Detectors Are Class-Incremental Learners: Wangwang Jia,

Zijian Gao,

Tianjiao Wan,

Yuan Cao,

Yong Dou,

Kele Xu; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Wangwang and Gao, Zijian and Wan, Tianjiao and Cao, Yuan and Dou, Yong and Xu, Kele}, title = {Geometry-driven OOD Detectors Are Class-Incremental Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34638-34649} }
ReAlign: Generalizable Image Forgery Detection via Reasoning-Aligned Representation: Qing Huang,

Zhipei Xu,

Xuanyu Zhang,

Xiangyu Yu,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qing and Xu, Zhipei and Zhang, Xuanyu and Yu, Xiangyu and Zhang, Jian}, title = {ReAlign: Generalizable Image Forgery Detection via Reasoning-Aligned Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21295-21305} }
Enhancing Unregistered Hyperspectral Image Super-Resolution via Unmixing-based Abundance Fusion Learning: Yingkai Zhang,

Tao Zhang,

Jing Nie,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yingkai and Zhang, Tao and Nie, Jing and Fu, Ying}, title = {Enhancing Unregistered Hyperspectral Image Super-Resolution via Unmixing-based Abundance Fusion Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41573-41583} }
Video-Only ToM: Enhancing Theory of Mind in Multimodal Large Language Models: Siqi Liu,

Xinyang Li,

Bochao Zou,

Junbao Zhuo,

Huimin Ma,

Jiansheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Siqi and Li, Xinyang and Zou, Bochao and Zhuo, Junbao and Ma, Huimin and Chen, Jiansheng}, title = {Video-Only ToM: Enhancing Theory of Mind in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19208-19218} }
Saliency-Driven Token Merging for Vision Transformers: Weiying Xie,

Xiaoyu Chen,

Xin Zhang,

Chenhe Hao,

Jitao Ma,

Yunsong Li,

Leyuan Fang; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Weiying and Chen, Xiaoyu and Zhang, Xin and Hao, Chenhe and Ma, Jitao and Li, Yunsong and Fang, Leyuan}, title = {Saliency-Driven Token Merging for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32184-32193} }
FastGS: Training 3D Gaussian Splatting in 100 Seconds: Shiwei Ren,

Tianci Wen,

Yongchun Fang,

Biao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Shiwei and Wen, Tianci and Fang, Yongchun and Lu, Biao}, title = {FastGS: Training 3D Gaussian Splatting in 100 Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26094-26103} }
GaussianZoom: Progressive Zoom-in Generative 3D Gaussian Splatting with Geometric and Semantic Guidance: Jiale Shi,

Jiarui Hu,

Zesong Yang,

Kaixuan Luan,

Hujun Bao,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Jiale and Hu, Jiarui and Yang, Zesong and Luan, Kaixuan and Bao, Hujun and Cui, Zhaopeng}, title = {GaussianZoom: Progressive Zoom-in Generative 3D Gaussian Splatting with Geometric and Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11850-11859} }
An Instance-Centric Panoptic Occupancy Prediction Benchmark for Autonomous Driving: Yi Feng,

Junwu E,

Zizhan Guo,

Yu Ma,

Hanli Wang,

Rui Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yi and E, Junwu and Guo, Zizhan and Ma, Yu and Wang, Hanli and Fan, Rui}, title = {An Instance-Centric Panoptic Occupancy Prediction Benchmark for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14219-14228} }
Learning from Itself: Mining Internal Knowledge from Vision Language Models for Continual Learning: Yizheng Gong,

Siyue Yu,

Waleed Al-Nuaimy,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Learning from Itself: Mining Internal Knowledge from Vision Language Models for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10830-10839} }
Reinforcing Video Object Segmentation to Think before it Segments: Sitong Gong,

Yunzhi Zhuge,

Lu Zhang,

Jiazuo Yu,

Pingping Zhang,

Xu Jia,

Huchuan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Sitong and Zhuge, Yunzhi and Zhang, Lu and Yu, Jiazuo and Zhang, Pingping and Jia, Xu and Lu, Huchuan}, title = {Reinforcing Video Object Segmentation to Think before it Segments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3835-3844} }
GazeOnce360: Fisheye-Based 360deg Multi-Person Gaze Estimation with Global-Local Feature Fusion: Zhuojiang Cai,

Zhenghui Sun,

Feng Lu; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Zhuojiang and Sun, Zhenghui and Lu, Feng}, title = {GazeOnce360: Fisheye-Based 360deg Multi-Person Gaze Estimation with Global-Local Feature Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12312-12321} }
Edit-As-Act: Goal-Regressive Planning for Open-Vocabulary 3D Indoor Scene Editing: Seongrae Noh,

SeungWon Seo,

Gyeong-Moon Park,

HyeongYeop Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noh_2026_CVPR, author = {Noh, Seongrae and Seo, SeungWon and Park, Gyeong-Moon and Kang, HyeongYeop}, title = {Edit-As-Act: Goal-Regressive Planning for Open-Vocabulary 3D Indoor Scene Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19864-19873} }
Scene-Centric Unsupervised Video Panoptic Segmentation: Christoph Reich,

Oliver Hahn,

Nikita Araslanov,

Laura Leal-Taixé,

Christian Rupprecht,

Daniel Cremers,

Stefan Roth; [pdf] [supp]
[bibtex]
@InProceedings{Reich_2026_CVPR, author = {Reich, Christoph and Hahn, Oliver and Araslanov, Nikita and Leal-Taix\'e, Laura and Rupprecht, Christian and Cremers, Daniel and Roth, Stefan}, title = {Scene-Centric Unsupervised Video Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10753-10765} }
A Semantically Disentangled Unified Model for Multi-category 3D Anomaly Detection: SuYeon Kim,

Wongyu Lee,

MyeongAh Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, SuYeon and Lee, Wongyu and Cho, MyeongAh}, title = {A Semantically Disentangled Unified Model for Multi-category 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33036-33045} }
AVFakeBench: A Comprehensive Audio-Video Forgery Detection Benchmark for AV-LMMs: Shuhan Xia,

Peipei Li,

Xuannan Liu,

Dongsen Zhang,

Xinyu Guo,

Zekun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Shuhan and Li, Peipei and Liu, Xuannan and Zhang, Dongsen and Guo, Xinyu and Li, Zekun}, title = {AVFakeBench: A Comprehensive Audio-Video Forgery Detection Benchmark for AV-LMMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35416-35426} }
Continual Learning for fMRI-Based Brain Disorder Diagnosis via Functional Connectivity Matrices Generative Replay: Qianyu Chen,

Shujian Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qianyu and Yu, Shujian}, title = {Continual Learning for fMRI-Based Brain Disorder Diagnosis via Functional Connectivity Matrices Generative Replay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25099-25109} }
B$^3$-Seg: Camera-Free, Training-Free 3DGS Segmentation via Analytic EIG and Beta-Bernoulli Bayesian Updates: Hiromichi Kamata,

Samuel Arthur Munro,

Fuminori Homma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kamata_2026_CVPR, author = {Kamata, Hiromichi and Munro, Samuel Arthur and Homma, Fuminori}, title = {B\${\textasciicircum}3\$-Seg: Camera-Free, Training-Free 3DGS Segmentation via Analytic EIG and Beta-Bernoulli Bayesian Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26867-26876} }
Towards Uncertainty-aware Unsupervised Domain Adaptation for Videos and Time-Series with Causal Optimal Transport: Khushboo Mishra,

Varun Trivedi,

Tanima Dutta; [pdf] [supp]
[bibtex]
@InProceedings{Mishra_2026_CVPR, author = {Mishra, Khushboo and Trivedi, Varun and Dutta, Tanima}, title = {Towards Uncertainty-aware Unsupervised Domain Adaptation for Videos and Time-Series with Causal Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29421-29430} }
What Makes Good Synthetic Training Data for Zero-Shot Stereo Matching?: David Yan,

Alexander Raistrick,

Jia Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, David and Raistrick, Alexander and Deng, Jia}, title = {What Makes Good Synthetic Training Data for Zero-Shot Stereo Matching?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34171-34180} }
Improving Text-to-Image Generation with Intrinsic Self-Confidence Rewards: Seungwook Kim,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seungwook and Cho, Minsu}, title = {Improving Text-to-Image Generation with Intrinsic Self-Confidence Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14832-14843} }
Retrieve-to-Restore: Efficient All-in-One Image Restoration with a Retrieval-Based Degradation Bank: Chenxu Wang,

Kai Zhang,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenxu and Zhang, Kai and Yang, Jian}, title = {Retrieve-to-Restore: Efficient All-in-One Image Restoration with a Retrieval-Based Degradation Bank}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1277-1287} }
GenieDrive: Towards Physics-Aware Driving World Model with 4D Occupancy Guided Video Generation: Zhenya Yang,

Zhe Liu,

Yuxiang Lu,

Liping Hou,

Chenxuan Miao,

Siyi Peng,

Bailan Feng,

Xiang Bai,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenya and Liu, Zhe and Lu, Yuxiang and Hou, Liping and Miao, Chenxuan and Peng, Siyi and Feng, Bailan and Bai, Xiang and Zhao, Hengshuang}, title = {GenieDrive: Towards Physics-Aware Driving World Model with 4D Occupancy Guided Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35680-35690} }
GeoViS: Geospatially Rewarded Visual Search for Remote Sensing Visual Grounding: Peirong Zhang,

Yidan Zhang,

Luxiao Xu,

Jinliang Lin,

Zonghao Guo,

Fengxiang Wang,

Xue Yang,

Kaiwen Wei,

Lei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peirong and Zhang, Yidan and Xu, Luxiao and Lin, Jinliang and Guo, Zonghao and Wang, Fengxiang and Yang, Xue and Wei, Kaiwen and Wang, Lei}, title = {GeoViS: Geospatially Rewarded Visual Search for Remote Sensing Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14335-14345} }
Human-like Abstract Visual Reasoning via Understanding and Solving Reasoning Loop: Xinwang Chen,

Xiuxing Li,

Qing Li,

Ziyue Zhuang,

Yutong Wu,

Ziyu Li,

Zhuo Wang,

Kai Li,

Jianye Hao,

Xia Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xinwang and Li, Xiuxing and Li, Qing and Zhuang, Ziyue and Wu, Yutong and Li, Ziyu and Wang, Zhuo and Li, Kai and Hao, Jianye and Wu, Xia}, title = {Human-like Abstract Visual Reasoning via Understanding and Solving Reasoning Loop}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41235-41244} }
OccuFly: A 3D Vision Benchmark for Semantic Scene Completion from the Aerial Perspective: Markus Gross,

Sai B. Matha,

Aya Fahmy,

Rui Song,

Daniel Cremers,

Henri Meeß; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gross_2026_CVPR, author = {Gross, Markus and Matha, Sai B. and Fahmy, Aya and Song, Rui and Cremers, Daniel and Mee{\ss}, Henri}, title = {OccuFly: A 3D Vision Benchmark for Semantic Scene Completion from the Aerial Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21474-21485} }
MotionHiFlow: Text-to-Motion via Hierarchical Flow Matching: Heng Li,

Xiaotong Lin,

Ling-An Zeng,

Yulei Kang,

Shuai Li,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Heng and Lin, Xiaotong and Zeng, Ling-An and Kang, Yulei and Li, Shuai and Hu, Jian-Fang}, title = {MotionHiFlow: Text-to-Motion via Hierarchical Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9352-9363} }
Dynamic Stream Network for Combinatorial Explosion Problem in Deformable Medical Image Registration: Shaochen Bi,

Yuting He,

Weiming Wang,

Hao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bi_2026_CVPR, author = {Bi, Shaochen and He, Yuting and Wang, Weiming and Chen, Hao}, title = {Dynamic Stream Network for Combinatorial Explosion Problem in Deformable Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15649-15658} }
What Are You Doing? A Closer Look at Controllable Human Video Generation: Emanuele Bugliarello,

Anurag Arnab,

Roni Paiss,

Christy Koh,

Pieter-Jan Kindermans,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bugliarello_2026_CVPR, author = {Bugliarello, Emanuele and Arnab, Anurag and Paiss, Roni and Koh, Christy and Kindermans, Pieter-Jan and Schmid, Cordelia}, title = {What Are You Doing? A Closer Look at Controllable Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11414-11425} }
MR-RAG: Multimodal Relevance-Aware Retrieval-Augmented Generation for Medical Visual Question Answering: Xuze Li,

Haozhao Wang,

Zhenyu Huang,

Zhongxu Wang,

Jinghua Zhang,

Ruixuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuze and Wang, Haozhao and Huang, Zhenyu and Wang, Zhongxu and Zhang, Jinghua and Li, Ruixuan}, title = {MR-RAG: Multimodal Relevance-Aware Retrieval-Augmented Generation for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15010-15019} }
Semantic-Adaptive Diffusion for Dynamic Spatiotemporal Fusion: Jinsong Zhang,

Ying Qu,

Yuan Liao,

Hairong Qi,

Zhenzhou Shao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinsong and Qu, Ying and Liao, Yuan and Qi, Hairong and Shao, Zhenzhou}, title = {Semantic-Adaptive Diffusion for Dynamic Spatiotemporal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12344-12353} }
OctoMed: Data Recipes for State-of-the-Art Multimodal Medical Reasoning: Timothy Ossowski,

Sheng Zhang,

Qianchu Liu,

Guanghui Qin,

Reuben Tan,

Tristan Naumann,

Junjie Hu,

Hoifung Poon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ossowski_2026_CVPR, author = {Ossowski, Timothy and Zhang, Sheng and Liu, Qianchu and Qin, Guanghui and Tan, Reuben and Naumann, Tristan and Hu, Junjie and Poon, Hoifung}, title = {OctoMed: Data Recipes for State-of-the-Art Multimodal Medical Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26251-26261} }
Let it Snow! Animating 3D Gaussian Scenes with Dynamic Weather Effects via Physics-Guided Score Distillation: Gal Fiebelman,

Hadar Averbuch-Elor,

Sagie Benaim; [pdf] [supp]
[bibtex]
@InProceedings{Fiebelman_2026_CVPR, author = {Fiebelman, Gal and Averbuch-Elor, Hadar and Benaim, Sagie}, title = {Let it Snow! Animating 3D Gaussian Scenes with Dynamic Weather Effects via Physics-Guided Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37322-37331} }
LoD-Loc v3: Generalized Aerial Localization in Dense Cities using Instance Silhouette Alignment: Shuaibang Peng,

Juelin Zhu,

Xia Li,

Kun Yang,

Yu Liu,

Maojun Zhang,

Shen Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Shuaibang and Zhu, Juelin and Li, Xia and Yang, Kun and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {LoD-Loc v3: Generalized Aerial Localization in Dense Cities using Instance Silhouette Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12193-12205} }
Mark4D: Temporally-Consistent Watermarking for 4D Gaussian Splatting: Jaejin Lee,

Minjae Jeong,

Joonhyuk Park,

Yechan Hwang,

Seunghun Baek,

Won Hwa Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jaejin and Jeong, Minjae and Park, Joonhyuk and Hwang, Yechan and Baek, Seunghun and Kim, Won Hwa}, title = {Mark4D: Temporally-Consistent Watermarking for 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39393-39402} }
Flow3r: Factored Flow Prediction for Scalable Visual Geometry Learning: Zhongxiao Cong,

Qitao Zhao,

Minsik Jeon,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cong_2026_CVPR, author = {Cong, Zhongxiao and Zhao, Qitao and Jeon, Minsik and Tulsiani, Shubham}, title = {Flow3r: Factored Flow Prediction for Scalable Visual Geometry Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {438-447} }
Spatial-SAM: Spatially Consistent 3D Electron Microscopy Segmentation with SDF Memory and Semi-Supervised Learning: Yikai Huang,

Renmin Han,

Yuxuan Wang,

Youcheng Cai,

Ligang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yikai and Han, Renmin and Wang, Yuxuan and Cai, Youcheng and Liu, Ligang}, title = {Spatial-SAM: Spatially Consistent 3D Electron Microscopy Segmentation with SDF Memory and Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22763-22772} }
Face2Scene: Using Facial Degradation as an Oracle for Diffusion-Based Scene Restoration: Amirhossein Kazerouni,

Maitreya Suin,

Tristan Aumentado-Armstrong,

Sina Honari,

Amanpreet Walia,

Iqbal Mohomed,

Konstantinos G. Derpanis,

Babak Taati,

Alex Levinshtein; [pdf] [supp]
[bibtex]
@InProceedings{Kazerouni_2026_CVPR, author = {Kazerouni, Amirhossein and Suin, Maitreya and Aumentado-Armstrong, Tristan and Honari, Sina and Walia, Amanpreet and Mohomed, Iqbal and Derpanis, Konstantinos G. and Taati, Babak and Levinshtein, Alex}, title = {Face2Scene: Using Facial Degradation as an Oracle for Diffusion-Based Scene Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8428-8438} }
3D Gaussian Splatting from Unposed Spike Stream: Yijia Guo,

Tong Hu,

Liwen Hu,

Lei Ma,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yijia and Hu, Tong and Hu, Liwen and Ma, Lei and Huang, Tiejun}, title = {3D Gaussian Splatting from Unposed Spike Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41002-41011} }
Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving: Jianhua Han,

Meng Tian,

Jiangtong Zhu,

Fan He,

Huixin Zhang,

Sitong Guo,

Dechang Zhu,

Hao Tang,

Pei Xu,

Yuze Guo,

Minzhe Niu,

Haojie Zhu,

Qichao Dong,

Xuechao Yan,

Siyuan Dong,

Lu Hou,

Qingqiu Huang,

Xiaosong Jia,

Hang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jianhua and Tian, Meng and Zhu, Jiangtong and He, Fan and Zhang, Huixin and Guo, Sitong and Zhu, Dechang and Tang, Hao and Xu, Pei and Guo, Yuze and Niu, Minzhe and Zhu, Haojie and Dong, Qichao and Yan, Xuechao and Dong, Siyuan and Hou, Lu and Huang, Qingqiu and Jia, Xiaosong and Xu, Hang}, title = {Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10642-10655} }
WaTeRFlow: Watermark Temporal Robustness via Flow Consistency: Utae Jeong,

Sumin In,

Hyunju Ryu,

Jaewan Choi,

Feng Yang,

Jongheon Jeong,

Seungryong Kim,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Utae and In, Sumin and Ryu, Hyunju and Choi, Jaewan and Yang, Feng and Jeong, Jongheon and Kim, Seungryong and Kim, Sangpil}, title = {WaTeRFlow: Watermark Temporal Robustness via Flow Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31703-31713} }
Post-training Feature Pruning for Fundus Images Classification: Van-Nguyen Pham,

Duc-Tai Le,

Junghyun Bum,

Hyunseung Choo; [pdf] [supp]
[bibtex]
@InProceedings{Pham_2026_CVPR, author = {Pham, Van-Nguyen and Le, Duc-Tai and Bum, Junghyun and Choo, Hyunseung}, title = {Post-training Feature Pruning for Fundus Images Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37590-37599} }
LoL: Longer than Longer, Scaling Video Generation to Hour: Justin Cui,

Jie Wu,

Ming Li,

Tao Yang,

Xiaojie Li,

Rui Wang,

Andrew Bai,

Yuanhao Ban,

Cho-Jui Hsieh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Justin and Wu, Jie and Li, Ming and Yang, Tao and Li, Xiaojie and Wang, Rui and Bai, Andrew and Ban, Yuanhao and Hsieh, Cho-Jui}, title = {LoL: Longer than Longer, Scaling Video Generation to Hour}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38132-38142} }
Bias at the End of the Score: Salma Abdel Magid,

Grace Guo,

Esin Tureci,

Amaya Dharmasiri,

Vikram V. Ramaswamy,

Hanspeter Pfister,

Olga Russakovsky; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magid_2026_CVPR, author = {Magid, Salma Abdel and Guo, Grace and Tureci, Esin and Dharmasiri, Amaya and Ramaswamy, Vikram V. and Pfister, Hanspeter and Russakovsky, Olga}, title = {Bias at the End of the Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24460-24470} }
Neuro-Cognitive Reward Modeling for Human-Centered Autonomous Vehicle Control: Zhuoli Zhuang,

Yu-Cheng Chang,

Yu-Kai Wang,

Thomas Do,

Chin-Teng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Zhuoli and Chang, Yu-Cheng and Wang, Yu-Kai and Do, Thomas and Lin, Chin-Teng}, title = {Neuro-Cognitive Reward Modeling for Human-Centered Autonomous Vehicle Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10599-10609} }
Markovian Scale Prediction: A New Era of Visual Autoregressive Generation: Yu Zhang,

Jingyi Liu,

Yiwei Shi,

Qi Zhang,

Duoqian Miao,

Changwei Wang,

Longbing Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yu and Liu, Jingyi and Shi, Yiwei and Zhang, Qi and Miao, Duoqian and Wang, Changwei and Cao, Longbing}, title = {Markovian Scale Prediction: A New Era of Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41266-41277} }
Similarity-as-Evidence: Calibrating Overconfident VLMs for Interpretable and Label-Efficient Medical Active Learning: Zhuofan Xie,

Zishan Lin,

Jinliang Lin,

Jie Qi,

Shaohua Hong,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Zhuofan and Lin, Zishan and Lin, Jinliang and Qi, Jie and Hong, Shaohua and Li, Shuo}, title = {Similarity-as-Evidence: Calibrating Overconfident VLMs for Interpretable and Label-Efficient Medical Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20973-20984} }
Ground Reaction Inertial Poser: Physics-based Human Motion Capture from Sparse IMUs and Insole Pressure Sensors: Ryosuke Hori,

Jyun-Ting Song,

Zhengyi Luo,

Jinkun Cao,

Soyong Shin,

Hideo Saito,

Kris Kitani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hori_2026_CVPR, author = {Hori, Ryosuke and Song, Jyun-Ting and Luo, Zhengyi and Cao, Jinkun and Shin, Soyong and Saito, Hideo and Kitani, Kris}, title = {Ground Reaction Inertial Poser: Physics-based Human Motion Capture from Sparse IMUs and Insole Pressure Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28435-28445} }
See Through the Noise: Improving Domain Generalization in Gaze Estimation: Yanming Peng,

Shijing Wang,

Yaping Huang,

Yi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Yanming and Wang, Shijing and Huang, Yaping and Tian, Yi}, title = {See Through the Noise: Improving Domain Generalization in Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31346-31355} }
Monocular Open Vocabulary Occupancy Prediction for Indoor Scenes: Changqing Zhou,

Yueru Luo,

Han Zhang,

Zeyu Jiang,

Changhao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Changqing and Luo, Yueru and Zhang, Han and Jiang, Zeyu and Chen, Changhao}, title = {Monocular Open Vocabulary Occupancy Prediction for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21627-21637} }
Kaleidoscopic Scintillation Event Imaging: Alex Bocchieri,

John Mamish,

David Appleyard,

Andreas Velten; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bocchieri_2026_CVPR, author = {Bocchieri, Alex and Mamish, John and Appleyard, David and Velten, Andreas}, title = {Kaleidoscopic Scintillation Event Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19749-19758} }
EvoGraph-R1: Self-Evolving Multimodal Knowledge Hypergraphs for Agentic Retrieval: Jiashi Lin,

Changhong Jiang,

Xiangru Lin,

Ruifei Zhang,

Xinyi Zhu,

Jiyao Liu,

Cheng Tang,

Ye Du,

Shujian Gao,

Junzhi Ning,

Lihao Liu,

Ziyan Huang,

Tianbin Li,

Jin Ye,

Junjun He; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiashi and Jiang, Changhong and Lin, Xiangru and Zhang, Ruifei and Zhu, Xinyi and Liu, Jiyao and Tang, Cheng and Du, Ye and Gao, Shujian and Ning, Junzhi and Liu, Lihao and Huang, Ziyan and Li, Tianbin and Ye, Jin and He, Junjun}, title = {EvoGraph-R1: Self-Evolving Multimodal Knowledge Hypergraphs for Agentic Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {756-765} }
FHAvatar: Fast and High-Fidelity Reconstruction of Face-and-Hair Composable 3D Head Avatar from Few Casual Captures: Yujie Sun,

Zhuoqiang Cai,

Chaoyue Niu,

Jianchuan Chen,

Zhiwen Chen,

Chengfei Lv,

Fan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yujie and Cai, Zhuoqiang and Niu, Chaoyue and Chen, Jianchuan and Chen, Zhiwen and Lv, Chengfei and Wu, Fan}, title = {FHAvatar: Fast and High-Fidelity Reconstruction of Face-and-Hair Composable 3D Head Avatar from Few Casual Captures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4132-4144} }
Edge-RecViT: Efficient Vision Transformer via Semantic-Refined Dynamic Recursion: YiZhou Li,

Jinyi Xu,

Mingyu Yin,

Xianyi Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, YiZhou and Xu, Jinyi and Yin, Mingyu and Zhao, Xianyi}, title = {Edge-RecViT: Efficient Vision Transformer via Semantic-Refined Dynamic Recursion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12987-12996} }
TVHighlights: LLM-Guided Human-Free Collaborative Training for Video Highlight Detection in Movies and TV Dramas: Qi Qiu,

Xuan Wu,

Jiawei Peng,

Yuan Miao,

Xu Yang,

Yanlong Du; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Qi and Wu, Xuan and Peng, Jiawei and Miao, Yuan and Yang, Xu and Du, Yanlong}, title = {TVHighlights: LLM-Guided Human-Free Collaborative Training for Video Highlight Detection in Movies and TV Dramas}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9773-9783} }
RAAS: LLM Agentic System Architecture Search with GRPO: Jiayi Yang,

Guancheng Wan,

Man Zhang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiayi and Wan, Guancheng and Zhang, Man and Ye, Mang}, title = {RAAS: LLM Agentic System Architecture Search with GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34470-34479} }
VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation: Longteng Jiang,

DanDan Zheng,

Qianqian Qiao,

Heng Huang,

Huaye Wang,

Yihang Bo,

Bao Peng,

Jingdong Chen,

Jun Zhou,

Xin Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Longteng and Zheng, DanDan and Qiao, Qianqian and Huang, Heng and Wang, Huaye and Bo, Yihang and Peng, Bao and Chen, Jingdong and Zhou, Jun and Jin, Xin}, title = {VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30457-30466} }
BiPA: Bilevel Prompt Adaptation for Underwater Instance Segmentation: Long Ma,

Haoze Zheng,

Yuhang Mao,

Jinyuan Liu,

Chengpei Xu,

Xinwei Xue,

Yi Wang,

Xiangjian He,

Weimin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Long and Zheng, Haoze and Mao, Yuhang and Liu, Jinyuan and Xu, Chengpei and Xue, Xinwei and Wang, Yi and He, Xiangjian and Wang, Weimin}, title = {BiPA: Bilevel Prompt Adaptation for Underwater Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10731-10740} }
Beyond Success: Refining Elegant Robot Manipulation from Mixed-Quality Data via Just-in-Time Intervention: Yanbo Mao,

Jianlong Fu,

Ruoxuan Zhang,

Hongxia Xie,

Meibao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Yanbo and Fu, Jianlong and Zhang, Ruoxuan and Xie, Hongxia and Yao, Meibao}, title = {Beyond Success: Refining Elegant Robot Manipulation from Mixed-Quality Data via Just-in-Time Intervention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13508-13518} }
GROW: Watermark Generation with Progressive Guidance for Diffusion Models: Pengcheng Luo,

Zexi Jia,

Yijia Zhong,

Jinchao Zhang,

Jie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Pengcheng and Jia, Zexi and Zhong, Yijia and Zhang, Jinchao and Zhou, Jie}, title = {GROW: Watermark Generation with Progressive Guidance for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35978-35987} }
Select Less, Reason More: Prioritizing Evidence Purity for Video Reasoning: Xuchen Li,

Xuzhao Li,

Shiyu Hu,

Kaiqi Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuchen and Li, Xuzhao and Hu, Shiyu and Huang, Kaiqi}, title = {Select Less, Reason More: Prioritizing Evidence Purity for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25621-25632} }
MultiShotMaster: A Controllable Multi-Shot Video Generation Framework: Qinghe Wang,

Xiaoyu Shi,

Baolu Li,

Weikang Bian,

Quande Liu,

Huchuan Lu,

Xintao Wang,

Pengfei Wan,

Kun Gai,

Xu Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qinghe and Shi, Xiaoyu and Li, Baolu and Bian, Weikang and Liu, Quande and Lu, Huchuan and Wang, Xintao and Wan, Pengfei and Gai, Kun and Jia, Xu}, title = {MultiShotMaster: A Controllable Multi-Shot Video Generation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16268-16278} }
Cubic Discrete Diffusion: Discrete Visual Generation on High-Dimensional Representation Tokens: Yuqing Wang,

Chuofan Ma,

Zhijie Lin,

Yao Teng,

Lijun Yu,

Shuai Wang,

Jiaming Han,

Jiashi Feng,

Yi Jiang,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuqing and Ma, Chuofan and Lin, Zhijie and Teng, Yao and Yu, Lijun and Wang, Shuai and Han, Jiaming and Feng, Jiashi and Jiang, Yi and Liu, Xihui}, title = {Cubic Discrete Diffusion: Discrete Visual Generation on High-Dimensional Representation Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36072-36081} }
NERFIFY: A Multi-Agent Framework for Turning NeRF Papers into Code: Seemandhar Jain,

Keshav Gupta,

Kunal Gupta,

Manmohan Chandraker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2026_CVPR, author = {Jain, Seemandhar and Gupta, Keshav and Gupta, Kunal and Chandraker, Manmohan}, title = {NERFIFY: A Multi-Agent Framework for Turning NeRF Papers into Code}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24384-24394} }
Gated Condition Injection without Multimodal Attention: Towards Controllable Linear-Attention Transformers: Yuhe Liu,

Zhenxiong Tan,

Yujia Hu,

Songhua Liu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuhe and Tan, Zhenxiong and Hu, Yujia and Liu, Songhua and Wang, Xinchao}, title = {Gated Condition Injection without Multimodal Attention: Towards Controllable Linear-Attention Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23388-23397} }
Ego-InBetween: Generating Object State Transitions in Ego-Centric Videos: Mengmeng Ge,

Takashi Isobe,

Xu Jia,

Yanan Sun,

Zetong Yang,

Weinong Wang,

Dong Zhou,

Dong Li,

Huchuan Lu,

Emad Barsoum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Mengmeng and Isobe, Takashi and Jia, Xu and Sun, Yanan and Yang, Zetong and Wang, Weinong and Zhou, Dong and Li, Dong and Lu, Huchuan and Barsoum, Emad}, title = {Ego-InBetween: Generating Object State Transitions in Ego-Centric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43145-43154} }
Unleashing the Power of Chain-of-Prediction for Monocular 3D Object Detection: Zhihao Zhang,

Abhinav Kumar,

Girish Chandar Ganesan,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhihao and Kumar, Abhinav and Ganesan, Girish Chandar and Liu, Xiaoming}, title = {Unleashing the Power of Chain-of-Prediction for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18755-18765} }
RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning: Ehsan Ahmadi,

Hunter Schofield,

Behzad Khamidehi,

Fazel Arasteh,

Jinjun Shan,

Lili Mou,

Dongfeng Bai,

Kasra Rezaee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahmadi_2026_CVPR, author = {Ahmadi, Ehsan and Schofield, Hunter and Khamidehi, Behzad and Arasteh, Fazel and Shan, Jinjun and Mou, Lili and Bai, Dongfeng and Rezaee, Kasra}, title = {RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39734-39743} }
Any4D: Unified Feed-Forward Metric 4D Reconstruction: Jay Karhade,

Nikhil Keetha,

Yuchen Zhang,

Tanisha Gupta,

Akash Sharma,

Sebastian Scherer,

Deva Ramanan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karhade_2026_CVPR, author = {Karhade, Jay and Keetha, Nikhil and Zhang, Yuchen and Gupta, Tanisha and Sharma, Akash and Scherer, Sebastian and Ramanan, Deva}, title = {Any4D: Unified Feed-Forward Metric 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14578-14589} }
It Takes Two: A Duet of Periodicity and Directionality for Burst Flicker Removal: Lishen Qu,

Shihao Zhou,

Jie Liang,

Hui Zeng,

Lei Zhang,

Jufeng Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Lishen and Zhou, Shihao and Liang, Jie and Zeng, Hui and Zhang, Lei and Yang, Jufeng}, title = {It Takes Two: A Duet of Periodicity and Directionality for Burst Flicker Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15516-15527} }
E-comIQ-ZH: A Human-Aligned Dataset and Benchmark for Fine-Grained Evaluation of E-commerce Posters with Chain-of-Thought: Meiqi Sun,

Mingyu Li,

Junxiong Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Meiqi and Li, Mingyu and Zhu, Junxiong}, title = {E-comIQ-ZH: A Human-Aligned Dataset and Benchmark for Fine-Grained Evaluation of E-commerce Posters with Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30941-30951} }
Progressive Mask Distillation for Self-supervised Video Representation: Kewei Wu,

Chong Liang,

Zhao Xie,

Dan Guo; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Kewei and Liang, Chong and Xie, Zhao and Guo, Dan}, title = {Progressive Mask Distillation for Self-supervised Video Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41677-41687} }
GThinker: Towards General Multimodal Reasoning via Cue-Guided Rethinking: Yufei Zhan,

Ziheng Wu,

Yousong Zhu,

Rongkun Xue,

Guanghao Zhou,

Ruipu Luo,

Zhenghao Chen,

Can Zhang,

Yifan Li,

Zhentao He,

Zheming Yang,

Ming Tang,

Minghui Qiu,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yufei and Wu, Ziheng and Zhu, Yousong and Xue, Rongkun and Zhou, Guanghao and Luo, Ruipu and Chen, Zhenghao and Zhang, Can and Li, Yifan and He, Zhentao and Yang, Zheming and Tang, Ming and Qiu, Minghui and Wang, Jinqiao}, title = {GThinker: Towards General Multimodal Reasoning via Cue-Guided Rethinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11954-11965} }
Physically Ground Commonsense Knowledge for Articulated Object Manipulation with Analytic Concepts: Jiude Wei,

Yuxuan Li,

Cewu Lu,

Jianhua Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jiude and Li, Yuxuan and Lu, Cewu and Sun, Jianhua}, title = {Physically Ground Commonsense Knowledge for Articulated Object Manipulation with Analytic Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13519-13528} }
Computational Speckle Pattern Interferometry: Shengxi Wu,

Sophia Yang,

Dorian Chan,

Matthew O'Toole; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Shengxi and Yang, Sophia and Chan, Dorian and O'Toole, Matthew}, title = {Computational Speckle Pattern Interferometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41710-41719} }
CADFS: A Big CAD Program Dataset and Framework for Computer-Aided Design with Large Language Models: Vladislav Pyatov,

Gleb Bobrovskikh,

Saveliy Galochkin,

Nikita Boldyrev,

Oleg Voynov,

Alexander Filippov,

Gonzalo Ferrer,

Peter Wonka,

Evgeny Burnaev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pyatov_2026_CVPR, author = {Pyatov, Vladislav and Bobrovskikh, Gleb and Galochkin, Saveliy and Boldyrev, Nikita and Voynov, Oleg and Filippov, Alexander and Ferrer, Gonzalo and Wonka, Peter and Burnaev, Evgeny}, title = {CADFS: A Big CAD Program Dataset and Framework for Computer-Aided Design with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10176-10186} }
Perceptual Neural Video Compression with Color Separation and Rank Chain: Xiongzhuang Liang,

Chuanbo Tang,

Zhuoyuan Li,

Li Li,

Dong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Xiongzhuang and Tang, Chuanbo and Li, Zhuoyuan and Li, Li and Liu, Dong}, title = {Perceptual Neural Video Compression with Color Separation and Rank Chain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5348-5358} }
OS-Fed: One Snapshot Is All You Need: Xuwei Qian,

Jinghui Zhang,

Yuchuan Tan,

Wenbo Huang,

Zhen Wu,

Shen Zhou,

LiSha Gao,

Ding Ding,

Fang Dong; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Xuwei and Zhang, Jinghui and Tan, Yuchuan and Huang, Wenbo and Wu, Zhen and Zhou, Shen and Gao, LiSha and Ding, Ding and Dong, Fang}, title = {OS-Fed: One Snapshot Is All You Need}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31758-31768} }
BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation: Rachit Saluja,

Asli Cihangir,

Ruining Deng,

Johannes C. Paetzold,

Fengbei Liu,

Mert R. Sabuncu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saluja_2026_CVPR, author = {Saluja, Rachit and Cihangir, Asli and Deng, Ruining and Paetzold, Johannes C. and Liu, Fengbei and Sabuncu, Mert R.}, title = {BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8492-8502} }
Your Classifier Can Do More: Towards Balancing the Gaps in Classification, Robustness, and Generation: Kaichao Jiang,

He Wang,

Xiaoshuai Hao,

Xiulong Yang,

Ajian Liu,

Qi Chu,

Yunfeng Diao,

Richang Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Kaichao and Wang, He and Hao, Xiaoshuai and Yang, Xiulong and Liu, Ajian and Chu, Qi and Diao, Yunfeng and Hong, Richang}, title = {Your Classifier Can Do More: Towards Balancing the Gaps in Classification, Robustness, and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42310-42320} }
FlashDecoder: Real-Time Latent-to-Pixel Streaming Decoder with Transformers: Minguk Kang,

Suha Kwak; [pdf] [supp]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Minguk and Kwak, Suha}, title = {FlashDecoder: Real-Time Latent-to-Pixel Streaming Decoder with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5294-5305} }
DiffuView: Multi-View Diffusion Pretraining for 3D Aware Robotic Manipulation: Kaizhao Zhang,

Tian Niu,

Tianyu Liu,

Chenen Guo,

Zijun Xu,

Qingda Hu,

Wenchao Ding; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaizhao and Niu, Tian and Liu, Tianyu and Guo, Chenen and Xu, Zijun and Hu, Qingda and Ding, Wenchao}, title = {DiffuView: Multi-View Diffusion Pretraining for 3D Aware Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23601-23611} }
FlowSteer: Guiding Few-Step Image Synthesis with Authentic Trajectories: Lei Ke,

Hubery Yin,

Gongye Liu,

Zhengyao Lv,

Jingcai Guo,

Chen Li,

Wenhan Luo,

Yujiu Yang,

Jing Lyu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Lei and Yin, Hubery and Liu, Gongye and Lv, Zhengyao and Guo, Jingcai and Li, Chen and Luo, Wenhan and Yang, Yujiu and Lyu, Jing}, title = {FlowSteer: Guiding Few-Step Image Synthesis with Authentic Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30381-30390} }
SyncMos: Scalable Motion Synchronisation for Multi-Agent Scene Interaction: Lingxiao Li,

Dongwon Kim,

Lingyan Ruan,

Bin Chen,

Taesoo Kwon,

Taehyun Rhee; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lingxiao and Kim, Dongwon and Ruan, Lingyan and Chen, Bin and Kwon, Taesoo and Rhee, Taehyun}, title = {SyncMos: Scalable Motion Synchronisation for Multi-Agent Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8174-8182} }
Cross-Architecture Adaptation: Cloud-Edge Continual Test-Time Adaptation with Dynamic Sampling and Heterogeneous Distillation: Zirui Xu,

Xianhang Chu,

Jiahao Li,

Xu Yang,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zirui and Chu, Xianhang and Li, Jiahao and Yang, Xu and Deng, Cheng}, title = {Cross-Architecture Adaptation: Cloud-Edge Continual Test-Time Adaptation with Dynamic Sampling and Heterogeneous Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39901-39910} }
UZ3DVG: Unaided Zero-Shot 3D Visual Grounding with Generated Language Conditions: Wenbin Tan,

Jiawen Lin,

Yuan Xie,

Yachao Zhang,

Yanyun Qu; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Wenbin and Lin, Jiawen and Xie, Yuan and Zhang, Yachao and Qu, Yanyun}, title = {UZ3DVG: Unaided Zero-Shot 3D Visual Grounding with Generated Language Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9547-9557} }
Learning from Noisy Supervision: A Denoising-Debiasing Framework for Weakly Supervised Video Anomaly Detection: Yaxin Zhao,

Yang Wang,

Wenya Guo,

Sihan Xu,

Xiangrui Cai,

Xi Lin,

Ying Zhang,

Xiaojie Yuan; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yaxin and Wang, Yang and Guo, Wenya and Xu, Sihan and Cai, Xiangrui and Lin, Xi and Zhang, Ying and Yuan, Xiaojie}, title = {Learning from Noisy Supervision: A Denoising-Debiasing Framework for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21326-21335} }
CoLoR: The Devil is in Scene Coordinate Regression for Large-Scale Visual Localization: Xindong Mao,

Hang Li,

Yuchen Wu,

Jiahe Li,

Xiao Bai,

Jin Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Xindong and Li, Hang and Wu, Yuchen and Li, Jiahe and Bai, Xiao and Zheng, Jin}, title = {CoLoR: The Devil is in Scene Coordinate Regression for Large-Scale Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12206-12216} }
Focus, Don't Prune: Identifying Instruction-Relevant Regions for Information-Rich Image Understanding: Mincheol Kwon,

Minseung Lee,

Seonga Choi,

Miso Choi,

Kyeongjin Oh,

Hyunyoung Lee,

Cheonyoung Park,

Yongho Song,

Seunghyun Park,

Jinkyu Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Mincheol and Lee, Minseung and Choi, Seonga and Choi, Miso and Oh, Kyeongjin and Lee, Hyunyoung and Park, Cheonyoung and Song, Yongho and Park, Seunghyun and Kim, Jinkyu}, title = {Focus, Don't Prune: Identifying Instruction-Relevant Regions for Information-Rich Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31900-31909} }
Revisiting Pose Sensitivity in Splat-based Computed Tomography under Sparse-view Reconstruction: Kiseok Choi,

Hyeongjun Cho,

Inchul Kim,

Min H. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Kiseok and Cho, Hyeongjun and Kim, Inchul and Kim, Min H.}, title = {Revisiting Pose Sensitivity in Splat-based Computed Tomography under Sparse-view Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25969-25978} }
Real2Edit2Real: Generating Robotic Demonstrations via a 3D Control Interface: Yujie Zhao,

Hongwei Fan,

Di Chen,

Shengcong Chen,

Liliang Chen,

Xiaoqi Li,

Guanghui Ren,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yujie and Fan, Hongwei and Chen, Di and Chen, Shengcong and Chen, Liliang and Li, Xiaoqi and Ren, Guanghui and Dong, Hao}, title = {Real2Edit2Real: Generating Robotic Demonstrations via a 3D Control Interface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23106-23116} }
Chart-FR1: Visual Focus-Driven Fine-Grained Reasoning on Dense Charts: Hongkun Pan,

Yuwei Wu,

Wanyi Hong,

Shenghui Hu,

Qitong Yan,

Yi Yang,

Rufei Han,

Changju Zhou,

Minfeng Zhu,

Dongming Han,

Wei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Hongkun and Wu, Yuwei and Hong, Wanyi and Hu, Shenghui and Yan, Qitong and Yang, Yi and Han, Rufei and Zhou, Changju and Zhu, Minfeng and Han, Dongming and Chen, Wei}, title = {Chart-FR1: Visual Focus-Driven Fine-Grained Reasoning on Dense Charts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26285-26294} }
$\alpha$Matte4K & $\mu$Matting: Dataset and Model for Ultra-Micro Precision Alpha Video Matting: Xinyi Chen,

Hang Dong,

Baowei Jiang,

Shenkun Xu,

Youqi Guan,

Kanle Shi,

Kun Gai,

Haichuan Song; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xinyi and Dong, Hang and Jiang, Baowei and Xu, Shenkun and Guan, Youqi and Shi, Kanle and Gai, Kun and Song, Haichuan}, title = {\${\textbackslash}alpha\$Matte4K \& \${\textbackslash}mu\$Matting: Dataset and Model for Ultra-Micro Precision Alpha Video Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12491-12500} }
Your One-Stop Solution for AI-Generated Video Detection: Long Ma,

Zihao Xue,

Yan Wang,

Zhiyuan Yan,

Jin Xu,

Xiaorui Jiang,

Haiyang Yu,

Yong Liao,

Zhen Bi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Long and Xue, Zihao and Wang, Yan and Yan, Zhiyuan and Xu, Jin and Jiang, Xiaorui and Yu, Haiyang and Liao, Yong and Bi, Zhen}, title = {Your One-Stop Solution for AI-Generated Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4458-4470} }
Breaking Multimodal LLM Safety via Video-Driven Prompting: Dong Wang,

Xiangyu He,

Xinqi Lyu,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dong and He, Xiangyu and Lyu, Xinqi and Xiao, Bin}, title = {Breaking Multimodal LLM Safety via Video-Driven Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8566-8576} }
Common Inpainted Objects In-N-Out of Context: Tianze Yang,

Tyson Jordan,

Ruitong Sun,

Ninghao Liu,

Jin Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Tianze and Jordan, Tyson and Sun, Ruitong and Liu, Ninghao and Sun, Jin}, title = {Common Inpainted Objects In-N-Out of Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13069-13079} }
U4D: Uncertainty-Aware 4D World Modeling from LiDAR Sequences: Xiang Xu,

Alan Liang,

Youquan Liu,

Linfeng Li,

Lingdong Kong,

Ziwei Liu,

Qingshan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Xiang and Liang, Alan and Liu, Youquan and Li, Linfeng and Kong, Lingdong and Liu, Ziwei and Liu, Qingshan}, title = {U4D: Uncertainty-Aware 4D World Modeling from LiDAR Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10027-10039} }
Disentangle-then-Align: Non-Iterative Hybrid Multimodal Image Registration via Cross-Scale Feature Disentanglement: Chunlei Zhang,

Jiahao Xia,

Yun Xiao,

Bo Jiang,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chunlei and Xia, Jiahao and Xiao, Yun and Jiang, Bo and Zhang, Jian}, title = {Disentangle-then-Align: Non-Iterative Hybrid Multimodal Image Registration via Cross-Scale Feature Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15912-15921} }
UniLDiff: Unlocking the Power of Diffusion Priors for All-in-One Image Restoration: Zihan Cheng,

Liangtai Zhou,

Dian Chen,

Ni Tang,

Xiaotong Luo,

Yuan Xie,

Yanyun Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zihan and Zhou, Liangtai and Chen, Dian and Tang, Ni and Luo, Xiaotong and Xie, Yuan and Qu, Yanyun}, title = {UniLDiff: Unlocking the Power of Diffusion Priors for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37465-37475} }
Eliminate Distance Differences Induced by Backdoor Attacks: Layer-Selective Training and Clipping to Mask Backdoor Models: Xuzeng Li,

Tao Zhang,

Xiangyun Tang,

Jiacheng Wang,

Jian Wang,

Jiawen Kang,

Jiqiang Liu,

Zhen Han,

Dusit Niyato,

Dong In Kim; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuzeng and Zhang, Tao and Tang, Xiangyun and Wang, Jiacheng and Wang, Jian and Kang, Jiawen and Liu, Jiqiang and Han, Zhen and Niyato, Dusit and Kim, Dong In}, title = {Eliminate Distance Differences Induced by Backdoor Attacks: Layer-Selective Training and Clipping to Mask Backdoor Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13336-13345} }
IntroSVG: Learning from Rendering Feedback for Text-to-SVG Generation via an Introspective Generator-Critic Framework: Feiyu Wang,

Jiayuan Yang,

Zhiyuan Zhao,

Da Zhang,

Bingyu Li,

Peng Liu,

Junyu Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Feiyu and Yang, Jiayuan and Zhao, Zhiyuan and Zhang, Da and Li, Bingyu and Liu, Peng and Gao, Junyu}, title = {IntroSVG: Learning from Rendering Feedback for Text-to-SVG Generation via an Introspective Generator-Critic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {615-625} }
Progressive Cross-Modal Causal Intervention for Long-Term Action Recognition: Shaowu Xu,

Xibin Jia,

Chao Fan,

Junyu Gao,

Jing Chang,

Qianmei Sun; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Shaowu and Jia, Xibin and Fan, Chao and Gao, Junyu and Chang, Jing and Sun, Qianmei}, title = {Progressive Cross-Modal Causal Intervention for Long-Term Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31186-31195} }
Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation: Ruiying Liu,

Yuanzhi Liang,

Haibin Huang,

Tianshu Yu,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ruiying and Liang, Yuanzhi and Huang, Haibin and Yu, Tianshu and Zhang, Chi}, title = {Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34408-34417} }
BHCast: Unlocking Black Hole Plasma Dynamics from a Single Blurry Image with Long-Term Forecasting: Renbo Tu,

Ali SaraerToosi,

Nicholas S. Conroy,

Gennady Pekhimenko,

Aviad Levis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Renbo and SaraerToosi, Ali and Conroy, Nicholas S. and Pekhimenko, Gennady and Levis, Aviad}, title = {BHCast: Unlocking Black Hole Plasma Dynamics from a Single Blurry Image with Long-Term Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5606-5616} }
BinaryAttention: One-Bit QK-Attention for Vision and Diffusion Transformers: Chaodong Xiao,

Zhengqiang Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Chaodong and Zhang, Zhengqiang and Zhang, Lei}, title = {BinaryAttention: One-Bit QK-Attention for Vision and Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12106-12117} }
FlowPortal: Residual-Corrected Flow for Training-Free Video Relighting and Background Replacement: Wenshuo Gao,

Junyi Fan,

Jiangyue Zeng,

Shuai Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Wenshuo and Fan, Junyi and Zeng, Jiangyue and Yang, Shuai}, title = {FlowPortal: Residual-Corrected Flow for Training-Free Video Relighting and Background Replacement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2025-2034} }
ENC-Bench: A Benchmark for Evaluating Multimodal Large Language Models in Electronic Navigational Chart Understanding: Ao Cheng,

Xingming Li,

Xuanyu Ji,

Xixiang He,

Qiyao Sun,

Chunping Qiu,

Runke Huang,

Qingyong Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ao and Li, Xingming and Ji, Xuanyu and He, Xixiang and Sun, Qiyao and Qiu, Chunping and Huang, Runke and Hu, Qingyong}, title = {ENC-Bench: A Benchmark for Evaluating Multimodal Large Language Models in Electronic Navigational Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2423-2433} }
STCDiT: Spatio-Temporally Consistent Diffusion Transformer for High-Quality Video Super-Resolution: Junyang Chen,

Jiangxin Dong,

Long Sun,

Yixin Yang,

Jinshan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Dong, Jiangxin and Sun, Long and Yang, Yixin and Pan, Jinshan}, title = {STCDiT: Spatio-Temporally Consistent Diffusion Transformer for High-Quality Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38281-38290} }
BD-Merging: Bias-Aware Dynamic Model Merging with Evidence-Guided Contrastive Learning: Yuhan Xie,

Chen Lyu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yuhan and Lyu, Chen}, title = {BD-Merging: Bias-Aware Dynamic Model Merging with Evidence-Guided Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12892-12901} }
OpenFS: Multi-Hand-Capable Fingerspelling Recognition with Implicit Signing-Hand Detection and Frame-Wise Letter-Conditioned Synthesis: Junuk Cha,

Jihyeon Kim,

Han-Mu Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2026_CVPR, author = {Cha, Junuk and Kim, Jihyeon and Park, Han-Mu}, title = {OpenFS: Multi-Hand-Capable Fingerspelling Recognition with Implicit Signing-Hand Detection and Frame-Wise Letter-Conditioned Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30707-30717} }
TLMA: Mitigating the Impact of Weakly Labeled Information for Video Anomaly Detection: Rong Xu,

Runqi Wang,

Yingjun Zhang,

Tao Tao,

Xiaomeng Li,

Liping Jing; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Rong and Wang, Runqi and Zhang, Yingjun and Tao, Tao and Li, Xiaomeng and Jing, Liping}, title = {TLMA: Mitigating the Impact of Weakly Labeled Information for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35597-35606} }
PromptDepth: Efficient and Promptable Geometric 3D Vision Model for Embodied Intelligence: Xianyun Wang,

Jiaxu Miao,

Tian Xu,

Siyuan Wang,

Yuehao Li,

Haoyang Hu,

Jun Xiao,

Yonghong Tian,

Jun Yu; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xianyun and Miao, Jiaxu and Xu, Tian and Wang, Siyuan and Li, Yuehao and Hu, Haoyang and Xiao, Jun and Tian, Yonghong and Yu, Jun}, title = {PromptDepth: Efficient and Promptable Geometric 3D Vision Model for Embodied Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28074-28085} }
SounDiT: Geo-Contextual Soundscape-to-Landscape Generation: Junbo Wang,

Haofeng Tan,

Bowen Liao,

Albert Jiang,

Teng Fei,

Qixing Huang,

Bing Zhou,

Zhengzhong Tu,

Shan Ye,

Yuhao Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Junbo and Tan, Haofeng and Liao, Bowen and Jiang, Albert and Fei, Teng and Huang, Qixing and Zhou, Bing and Tu, Zhengzhong and Ye, Shan and Kang, Yuhao}, title = {SounDiT: Geo-Contextual Soundscape-to-Landscape Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32659-32670} }
One Model, Many Budgets: Elastic Latent Interfaces for Diffusion Transformers: Moayed Haji-Ali,

Willi Menapace,

Ivan Skorokhodov,

Dogyun Park,

Anil Kag,

Michael Vasilkovsky,

Sergey Tulyakov,

Vicente Ordonez,

Aliaksandr Siarohin; [pdf] [supp]
[bibtex]
@InProceedings{Haji-Ali_2026_CVPR, author = {Haji-Ali, Moayed and Menapace, Willi and Skorokhodov, Ivan and Park, Dogyun and Kag, Anil and Vasilkovsky, Michael and Tulyakov, Sergey and Ordonez, Vicente and Siarohin, Aliaksandr}, title = {One Model, Many Budgets: Elastic Latent Interfaces for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4558-4568} }
OmniSonic: Towards Universal and Holistic Audio Generation from Video and Text: Weiguo Pian,

Saksham Singh Kushwaha,

Zhimin Chen,

Shijian Deng,

Kai Wang,

Yunhui Guo,

Yapeng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pian_2026_CVPR, author = {Pian, Weiguo and Kushwaha, Saksham Singh and Chen, Zhimin and Deng, Shijian and Wang, Kai and Guo, Yunhui and Tian, Yapeng}, title = {OmniSonic: Towards Universal and Holistic Audio Generation from Video and Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {540-549} }
Residual Decoding: Mitigating Hallucinations in Large Vision-Language Models via History-Aware Residual Guidance: Xinrong Chen,

Xu Chu,

Yingmin Qiu,

Hengyuan Zhang,

Jing Xiong,

Shiyu Tang,

Shuai Liu,

Shaokang Yang,

Cheng Yang,

Hayden Kwok-Hay So,

Ngai Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xinrong and Chu, Xu and Qiu, Yingmin and Zhang, Hengyuan and Xiong, Jing and Tang, Shiyu and Liu, Shuai and Yang, Shaokang and Yang, Cheng and So, Hayden Kwok-Hay and Wong, Ngai}, title = {Residual Decoding: Mitigating Hallucinations in Large Vision-Language Models via History-Aware Residual Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25281-25292} }
SAR2Net: Learning Spatially Anchored Representations for Retrieval-Guided Cross-Stain Alignment: Tianle Shen,

Fang Yan,

Xiaofan Zhang; [pdf]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Tianle and Yan, Fang and Zhang, Xiaofan}, title = {SAR2Net: Learning Spatially Anchored Representations for Retrieval-Guided Cross-Stain Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12544-12553} }
PoInit-of-View: Poisoning Initialization of Views Transfers Across Multiple 3D Reconstruction Systems: Weijie Wang,

Songlong Xing,

Zhengyu Zhao,

Nicu Sebe,

Bruno Lepri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Weijie and Xing, Songlong and Zhao, Zhengyu and Sebe, Nicu and Lepri, Bruno}, title = {PoInit-of-View: Poisoning Initialization of Views Transfers Across Multiple 3D Reconstruction Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20670-20679} }
CARE: A Molecular-Guided Foundation Model with Adaptive Region Modeling for Whole Slide Image Analysis: Di Zhang,

Zhangpeng Gong,

Xiaobo Pang,

Jiashuai Liu,

Junbo Lu,

Hao Cui,

Jiusong Ge,

Zhi Zeng,

Kai Yi,

Yinghua Li,

Si Liu,

Tingsong Yu,

Haoran Wang,

Mireia Crispin-Ortuzar,

Weimiao Yu,

Chen Li,

Zeyu Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Di and Gong, Zhangpeng and Pang, Xiaobo and Liu, Jiashuai and Lu, Junbo and Cui, Hao and Ge, Jiusong and Zeng, Zhi and Yi, Kai and Li, Yinghua and Liu, Si and Yu, Tingsong and Wang, Haoran and Crispin-Ortuzar, Mireia and Yu, Weimiao and Li, Chen and Gao, Zeyu}, title = {CARE: A Molecular-Guided Foundation Model with Adaptive Region Modeling for Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21078-21088} }
A2GC: Asymmetric Aggregation with Geometric Constraints for Locally Aggregated Descriptors: Zhenyu Li,

Tianyi Shang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhenyu and Shang, Tianyi}, title = {A2GC: Asymmetric Aggregation with Geometric Constraints for Locally Aggregated Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19423-19431} }
Decoupling Stability and Plasticity for Multi-Modal Test-Time Adaptation: Yongbo He,

Zirun Guo,

Tao Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yongbo and Guo, Zirun and Jin, Tao}, title = {Decoupling Stability and Plasticity for Multi-Modal Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15020-15029} }
ActiveVLA: Injecting Active Perception into Vision-Language-Action Models for Precise 3D Robotic Manipulation: Zhenyang Liu,

Yongchong Gu,

Yikai Wang,

Xiangyang Xue,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhenyang and Gu, Yongchong and Wang, Yikai and Xue, Xiangyang and Fu, Yanwei}, title = {ActiveVLA: Injecting Active Perception into Vision-Language-Action Models for Precise 3D Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8141-8151} }
MaskAdapt: Learning Flexible Motion Adaptation via Mask-Invariant Prior for Physics-Based Characters: Soomin Park,

Eunseong Lee,

Kwang Bin Lee,

Sung-Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Soomin and Lee, Eunseong and Bin Lee, Kwang and Lee, Sung-Hee}, title = {MaskAdapt: Learning Flexible Motion Adaptation via Mask-Invariant Prior for Physics-Based Characters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2285-2294} }
SeeU: Seeing the Unseen World via 4D Dynamics-aware Generation: Yu Yuan,

Tharindu Wickremasinghe,

Zeeshan Nadir,

Xijun Wang,

Yiheng Chi,

Stanley H. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Yu and Wickremasinghe, Tharindu and Nadir, Zeeshan and Wang, Xijun and Chi, Yiheng and Chan, Stanley H.}, title = {SeeU: Seeing the Unseen World via 4D Dynamics-aware Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11150-11162} }
MedLoc-R1: Performance-Aware Curriculum Reward Scheduling for GRPO-Based Medical Visual Grounding: Guangjing Yang,

Ziyuan Qin,

Chaoran Zhang,

Chenlin Du,

Jinglin Wang,

Wanran Sun,

Zhenyu Zhang,

Bing Ji,

Qicheng Lao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Guangjing and Qin, Ziyuan and Zhang, Chaoran and Du, Chenlin and Wang, Jinglin and Sun, Wanran and Zhang, Zhenyu and Ji, Bing and Lao, Qicheng}, title = {MedLoc-R1: Performance-Aware Curriculum Reward Scheduling for GRPO-Based Medical Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21036-21045} }
REVISOR: Beyond Textual Reflection, Towards Multimodal Introspective Reasoning in Long-Form Video Understanding: Jiaze Li,

Hao Yin,

Wenhui Tan,

Jingyang Chen,

Boshen Xu,

Yuxun Qu,

Yijing Chen,

Jianzhong Ju,

Zhenbo Luo,

Jian Luan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiaze and Yin, Hao and Tan, Wenhui and Chen, Jingyang and Xu, Boshen and Qu, Yuxun and Chen, Yijing and Ju, Jianzhong and Luo, Zhenbo and Luan, Jian}, title = {REVISOR: Beyond Textual Reflection, Towards Multimodal Introspective Reasoning in Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5059-5069} }
Hierarchical Concept Embedding & Pursuit for Interpretable Image Classification: Nghia Nguyen,

Tianjiao Ding,

René Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Nghia and Ding, Tianjiao and Vidal, Ren\'e}, title = {Hierarchical Concept Embedding \& Pursuit for Interpretable Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2907-2917} }
One Layer's Trash is Another Layer's Treasure: Adaptive Layer-wise Visual Token Selection in LVLMs: Yongru Chen,

Kai Zhang,

Zeliang Zong,

Yuchen Lu,

Wenming Tan,

Ye Ren,

Jilin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yongru and Zhang, Kai and Zong, Zeliang and Lu, Yuchen and Tan, Wenming and Ren, Ye and Hu, Jilin}, title = {One Layer's Trash is Another Layer's Treasure: Adaptive Layer-wise Visual Token Selection in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17672-17681} }
R2-Seg: Training-Free OOD Medical Tumor Segmentation via Anatomical Reasoning and Statistical Rejection: Shuaike Shen,

Ke Liu,

Jiaqing Xie,

Shangde Gao,

Chunhua Shen,

Ge Liu,

Mireia Crispin-Ortuzar,

Shangqi Gao; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Shuaike and Liu, Ke and Xie, Jiaqing and Gao, Shangde and Shen, Chunhua and Liu, Ge and Crispin-Ortuzar, Mireia and Gao, Shangqi}, title = {R2-Seg: Training-Free OOD Medical Tumor Segmentation via Anatomical Reasoning and Statistical Rejection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21669-21678} }
Bias In, Bias Out? Finding Unbiased Subnetworks in Vanilla Models: Ivan Luiz De Moura Matos,

Abdel Djalil Sad Saoud,

Ekaterina Iakovleva,

Vito Paolo Pastore,

Enzo Tartaglione; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{De_Moura_Matos_2026_CVPR, author = {De Moura Matos, Ivan Luiz and Saoud, Abdel Djalil Sad and Iakovleva, Ekaterina and Pastore, Vito Paolo and Tartaglione, Enzo}, title = {Bias In, Bias Out? Finding Unbiased Subnetworks in Vanilla Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3294-3305} }
RecEdit-Drive: 3D Reconstruction-Guided Spatiotemporal Video Editing for Autonomous Driving Scenes: Yipeng Wu,

Xin Wang,

Chenghan Yang,

Chong Wang,

Dongdong Wu,

Wanchao Su,

Hengshuang Zhao,

Wei Feng,

Kairui Yang,

Di Lin; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yipeng and Wang, Xin and Yang, Chenghan and Wang, Chong and Wu, Dongdong and Su, Wanchao and Zhao, Hengshuang and Feng, Wei and Yang, Kairui and Lin, Di}, title = {RecEdit-Drive: 3D Reconstruction-Guided Spatiotemporal Video Editing for Autonomous Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25415-25425} }
Particulate: Feed-Forward 3D Object Articulation: Ruining Li,

Yuxin Yao,

Chuanxia Zheng,

Christian Rupprecht,

Joan Lasenby,

Shangzhe Wu,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ruining and Yao, Yuxin and Zheng, Chuanxia and Rupprecht, Christian and Lasenby, Joan and Wu, Shangzhe and Vedaldi, Andrea}, title = {Particulate: Feed-Forward 3D Object Articulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27708-27718} }
EXOTIC: External Vision-driven Incomplete Multi-view Classification: Shilin Xu,

Dezhong Peng,

Zhenwen Ren,

Yuan Sun; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Shilin and Peng, Dezhong and Ren, Zhenwen and Sun, Yuan}, title = {EXOTIC: External Vision-driven Incomplete Multi-view Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30216-30225} }
ConceptPrism: Concept Disentanglement in Personalized Diffusion Models via Residual Token Optimization: Minseo Kim,

Minchan Kwon,

Dongyeun Lee,

Yunho Jeon,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minseo and Kwon, Minchan and Lee, Dongyeun and Jeon, Yunho and Kim, Junmo}, title = {ConceptPrism: Concept Disentanglement in Personalized Diffusion Models via Residual Token Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2381-2390} }
Video2Robo: 3DGS-based Synthetic Data from One Video Enables Scalable Robot Learning: Yinan Deng,

Kejia Hu,

Ye Chen,

Jianyu Dou,

Jiahui Wang,

Jingyu Zhao,

Haojia Ao,

Yi Yang,

Yufeng Yue; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Yinan and Hu, Kejia and Chen, Ye and Dou, Jianyu and Wang, Jiahui and Zhao, Jingyu and Ao, Haojia and Yang, Yi and Yue, Yufeng}, title = {Video2Robo: 3DGS-based Synthetic Data from One Video Enables Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6695-6705} }
Attack for Defense: Adversarial Agents for Point Prompt Optimization Empowering Segment Anything Model: Xueyu Liu,

Xiaoyi Zhang,

Meilin Liu,

Guangze Shi,

Jia Shen,

Yujie Wang,

Cai Zhao,

Ziyuan He,

Yongfei Wu,

Mingqiang Wei,

Yongle Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xueyu and Zhang, Xiaoyi and Liu, Meilin and Shi, Guangze and Shen, Jia and Wang, Yujie and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang and Chen, Yongle}, title = {Attack for Defense: Adversarial Agents for Point Prompt Optimization Empowering Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6591-6600} }
Revisiting Learning with Noisy Labels: Active Forgetting and Noise Suppression: Mengmeng Sheng,

Zeren Sun,

Tao Chen,

Jinshan Pan,

Yazhou Yao,

Fumin Shen; [pdf]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Mengmeng and Sun, Zeren and Chen, Tao and Pan, Jinshan and Yao, Yazhou and Shen, Fumin}, title = {Revisiting Learning with Noisy Labels: Active Forgetting and Noise Suppression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24792-24802} }
FINER: MLLMs Hallucinate under Fine-grained Negative Queries: Rui Xiao,

Sanghwan Kim,

Yongqin Xian,

Zeynep Akata,

Stephan Alaniz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Rui and Kim, Sanghwan and Xian, Yongqin and Akata, Zeynep and Alaniz, Stephan}, title = {FINER: MLLMs Hallucinate under Fine-grained Negative Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36235-36244} }
Visual Prototype Conditioned Focal Region Generation for UAV-Based Object Detection: Wenhao Li,

Zimeng Wu,

Yu Wu,

Zehua Fu,

Jiaxin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Wu, Zimeng and Wu, Yu and Fu, Zehua and Chen, Jiaxin}, title = {Visual Prototype Conditioned Focal Region Generation for UAV-Based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3772-3782} }
MoRel: Long-Range Flicker-Free 4D Motion Modeling via Anchor Relay-based Bidirectioanl Blending with Hierarchical Densification: Sangwoon Kwak,

Weeyoung Kwon,

Jun Young Jeong,

Geonho Kim,

Won-Sik Cheong,

Jihyong Oh; [pdf] [supp]
[bibtex]
@InProceedings{Kwak_2026_CVPR, author = {Kwak, Sangwoon and Kwon, Weeyoung and Jeong, Jun Young and Kim, Geonho and Cheong, Won-Sik and Oh, Jihyong}, title = {MoRel: Long-Range Flicker-Free 4D Motion Modeling via Anchor Relay-based Bidirectioanl Blending with Hierarchical Densification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37370-37379} }
Multi-Scale Local Speculative Decoding for Image Generation: Elia Peruzzo,

Guillaume Sautière,

Amirhossein Habibian; [pdf] [supp]
[bibtex]
@InProceedings{Peruzzo_2026_CVPR, author = {Peruzzo, Elia and Sauti\`ere, Guillaume and Habibian, Amirhossein}, title = {Multi-Scale Local Speculative Decoding for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5253-5262} }
Fully Decentralized Certified Unlearning: Hithem Lamri,

Michail Maniatakos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lamri_2026_CVPR, author = {Lamri, Hithem and Maniatakos, Michail}, title = {Fully Decentralized Certified Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24577-24586} }
CG-Floor: Centroid-Guided Diffusion for Large-Scale Floorplan Generation: Hongjin Lian,

Jian Ma,

Hongjie Chen,

Jia Li,

Ruizhen Hu,

Yu-Kun Lai,

Kun Li; [pdf] [supp]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Hongjin and Ma, Jian and Chen, Hongjie and Li, Jia and Hu, Ruizhen and Lai, Yu-Kun and Li, Kun}, title = {CG-Floor: Centroid-Guided Diffusion for Large-Scale Floorplan Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18354-18363} }
CoV-Align: Efficient Fine-grained Cross-Modal Alignment with Cohesive Visual Semantics Priority: Hengqi Liu,

Wanting Zhou,

Longteng Kong,

Fangxiang Feng,

Lei Ren,

Wei Chen,

Xiaojie Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hengqi and Zhou, Wanting and Kong, Longteng and Feng, Fangxiang and Ren, Lei and Chen, Wei and Wang, Xiaojie}, title = {CoV-Align: Efficient Fine-grained Cross-Modal Alignment with Cohesive Visual Semantics Priority}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36828-36837} }
Question-guided Visual Compression with Memory Feedback for Long-Term Video Understanding: Sosuke Yamao,

Natsuki Miyahara,

Yuankai Qi,

Shun Takeuchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamao_2026_CVPR, author = {Yamao, Sosuke and Miyahara, Natsuki and Qi, Yuankai and Takeuchi, Shun}, title = {Question-guided Visual Compression with Memory Feedback for Long-Term Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32850-32859} }
A Difference-in-Difference Approach to Detecting AI-Generated Images: Xinyi Qi,

Kai Ye,

Chengchun Shi,

Ying Yang,

Jin Zhu,

Hongyi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Xinyi and Ye, Kai and Shi, Chengchun and Yang, Ying and Zhu, Jin and Zhou, Hongyi}, title = {A Difference-in-Difference Approach to Detecting AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42964-42975} }
Probabilistic Discrepancy Learning for Roadside LiDAR Scene Completion: Xiaogang Wu,

Jinchao Hu,

Zixian Wang,

Dun Liu,

BoXiang Cheng,

Yiqiang Wu; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaogang and Hu, Jinchao and Wang, Zixian and Liu, Dun and Cheng, BoXiang and Wu, Yiqiang}, title = {Probabilistic Discrepancy Learning for Roadside LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9955-9964} }
CF-IPT: Cross-Modal Fusion Interactive Prompt Tuning of Vision-Language Pre-Trained Model for Multisource Remote Sensing Data Classification: Jinheng Ji,

Jiahui Qu,

Wenqian Dong,

Yunsong Li; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Jinheng and Qu, Jiahui and Dong, Wenqian and Li, Yunsong}, title = {CF-IPT: Cross-Modal Fusion Interactive Prompt Tuning of Vision-Language Pre-Trained Model for Multisource Remote Sensing Data Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23021-23030} }
CompetitorFormer: Mitigating Query Conflicts for 3D Instance Segmentation via Competitive Strategy: Duanchu Wang,

Junjie Yang,

Haoran Gong,

Jing Liu,

Di Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Duanchu and Yang, Junjie and Gong, Haoran and Liu, Jing and Wang, Di}, title = {CompetitorFormer: Mitigating Query Conflicts for 3D Instance Segmentation via Competitive Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34724-34733} }
Ego-1K - A Large-Scale Multiview Video Dataset for Egocentric Vision: Jae Yong Lee,

Daniel Scharstein,

Akash Bapat,

Hao Hu,

Andrew Fu,

Haoru Zhao,

Paul Sammut,

Xiang Li,

Stephen Jeapes,

Anik Gupta,

Lior David,

Saketh Madhuvarasu,

Jay Girish Joshi,

Jason Wither; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jae Yong and Scharstein, Daniel and Bapat, Akash and Hu, Hao and Fu, Andrew and Zhao, Haoru and Sammut, Paul and Li, Xiang and Jeapes, Stephen and Gupta, Anik and David, Lior and Madhuvarasu, Saketh and Joshi, Jay Girish and Wither, Jason}, title = {Ego-1K - A Large-Scale Multiview Video Dataset for Egocentric Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19854-19863} }
LensWalk: Agentic Video Understanding by Planning How You See in Videos: Keliang Li,

Yansong Li,

Hongze Shen,

Mengdi Liu,

Hong Chang,

Shiguang Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Keliang and Li, Yansong and Shen, Hongze and Liu, Mengdi and Chang, Hong and Shan, Shiguang}, title = {LensWalk: Agentic Video Understanding by Planning How You See in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19518-19528} }
Hilbert Curve-Based Attention Enabling Topology-Preserving Image Tensor Representation for Semantic Segmentation Network: Linkang Xu,

Gang Li,

Yue Song,

Xiangxin Ji; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Linkang and Li, Gang and Song, Yue and Ji, Xiangxin}, title = {Hilbert Curve-Based Attention Enabling Topology-Preserving Image Tensor Representation for Semantic Segmentation Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13113-13122} }
Ego-Grounding for Personalized Question-Answering in Egocentric Videos: Junbin Xiao,

Shenglang Zhang,

Pengxiang Zhu,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junbin and Zhang, Shenglang and Zhu, Pengxiang and Yao, Angela}, title = {Ego-Grounding for Personalized Question-Answering in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40537-40547} }
Gaussian Mapping for Evolving Scenes: Vladimir Yugay,

Thies Kersten,

Luca Carlone,

Theo Gevers,

Martin R. Oswald,

Lukas Schmid; [pdf] [arXiv]
[bibtex]
@InProceedings{Yugay_2026_CVPR, author = {Yugay, Vladimir and Kersten, Thies and Carlone, Luca and Gevers, Theo and Oswald, Martin R. and Schmid, Lukas}, title = {Gaussian Mapping for Evolving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18903-18912} }
MERIT: Multi-domain Efficient RAW Image Translation: Wenjun Huang,

Shenghao Fu,

Yian Jin,

Yang Ni,

Ziteng Cui,

Hanning Chen,

Yirui He,

Yezi Liu,

Sanggeon Yun,

SungHeon Jeong,

Ryozo Masukawa,

William Youngwoo Chung,

Mohsen Imani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Wenjun and Fu, Shenghao and Jin, Yian and Ni, Yang and Cui, Ziteng and Chen, Hanning and He, Yirui and Liu, Yezi and Yun, Sanggeon and Jeong, SungHeon and Masukawa, Ryozo and Chung, William Youngwoo and Imani, Mohsen}, title = {MERIT: Multi-domain Efficient RAW Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37216-37225} }
Memory-Efficient Transfer Learning with Fading Side Networks via Masked Dual Path Distillation: Yutong Zhang,

Jiaxin Chen,

Honglin Chen,

Kaiqi Zheng,

Shengcai Liao,

Hanwen Zhong,

Weixin Li,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yutong and Chen, Jiaxin and Chen, Honglin and Zheng, Kaiqi and Liao, Shengcai and Zhong, Hanwen and Li, Weixin and Wang, Yunhong}, title = {Memory-Efficient Transfer Learning with Fading Side Networks via Masked Dual Path Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25043-25054} }
LAOF: Robust Latent Action Learning with Optical Flow Constraints: Xizhou Bu,

Jiexi Lyu,

Fulei Sun,

Ruichen Yang,

Zhiqiang Ma,

Wei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bu_2026_CVPR, author = {Bu, Xizhou and Lyu, Jiexi and Sun, Fulei and Yang, Ruichen and Ma, Zhiqiang and Li, Wei}, title = {LAOF: Robust Latent Action Learning with Optical Flow Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27334-27344} }
Language-driven Fine-grained Retrieval: Shijie Wang,

Xin Yu,

Yadan Luo,

Zijian Wang,

Pengfei Zhang,

Zi Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shijie and Yu, Xin and Luo, Yadan and Wang, Zijian and Zhang, Pengfei and Huang, Zi}, title = {Language-driven Fine-grained Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2682-2692} }
Seeing Depth Through Frequency and Motion: A Progressive Training Paradigm for Monocular Depth Estimation: Ke Li,

Bolin Song,

Hongbo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ke and Song, Bolin and Liu, Hongbo}, title = {Seeing Depth Through Frequency and Motion: A Progressive Training Paradigm for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26845-26854} }
X-AVDT: Audio-Visual Cross-Attention for Robust Deepfake Detection: Youngseo Kim,

Kwan Yun,

Seokhyeon Hong,

Sihun Cha,

Colette Suhjung Koo,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Youngseo and Yun, Kwan and Hong, Seokhyeon and Cha, Sihun and Koo, Colette Suhjung and Noh, Junyong}, title = {X-AVDT: Audio-Visual Cross-Attention for Robust Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4403-4414} }
SURF: Signature-Retained Fast Video Generation: Kaixin Ding,

Xi Chen,

Sihui Ji,

Yuan Gao,

Liang Hou,

Xin Tao,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Kaixin and Chen, Xi and Ji, Sihui and Gao, Yuan and Hou, Liang and Tao, Xin and Zhao, Hengshuang}, title = {SURF: Signature-Retained Fast Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9171-9181} }
See, Think, Act: Teaching Multimodal Agents to Effectively Interact with GUI by Identifying Toggles: Zongru Wu,

Rui Mao,

Zhiyuan Tian,

Pengzhou Cheng,

Tianjie Ju,

Zheng Wu,

Lingzhong Dong,

Haiyue Sheng,

Zhuosheng Zhang,

Gongshen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zongru and Mao, Rui and Tian, Zhiyuan and Cheng, Pengzhou and Ju, Tianjie and Wu, Zheng and Dong, Lingzhong and Sheng, Haiyue and Zhang, Zhuosheng and Liu, Gongshen}, title = {See, Think, Act: Teaching Multimodal Agents to Effectively Interact with GUI by Identifying Toggles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27536-27546} }
FeatureFool: Zero-Query Fooling of Video Models via Feature Map: Duoxun Tang,

Xi Xiao,

Guangwu Hu,

Kangkang Sun,

Xiao Yang,

Dongyang Chen,

Qing Li,

Yong-jie Yin,

Jiyao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Duoxun and Xiao, Xi and Hu, Guangwu and Sun, Kangkang and Yang, Xiao and Chen, Dongyang and Li, Qing and Yin, Yong-jie and Wang, Jiyao}, title = {FeatureFool: Zero-Query Fooling of Video Models via Feature Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42268-42279} }
A Temporal and Content Co-Awareness Latent Diffusion for Controllable Hand Image Generation: Shuang Hao,

Pengfei Ren,

Haifeng Sun,

Ting Pan,

Qi Qi,

Lei Zhang,

Cong Liu,

Jianxin Liao,

Jingyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Shuang and Ren, Pengfei and Sun, Haifeng and Pan, Ting and Qi, Qi and Zhang, Lei and Liu, Cong and Liao, Jianxin and Wang, Jingyu}, title = {A Temporal and Content Co-Awareness Latent Diffusion for Controllable Hand Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38324-38334} }
StyleGallery: Training-free and Semantic-aware Personalized Style Transfer from Arbitrary Image References: Boyu He,

Yunfan Ye,

Chang Liu,

Weishang Wu,

Fang Liu,

Zhiping Cai; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Boyu and Ye, Yunfan and Liu, Chang and Wu, Weishang and Liu, Fang and Cai, Zhiping}, title = {StyleGallery: Training-free and Semantic-aware Personalized Style Transfer from Arbitrary Image References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29092-29102} }
Meta-Learning In-Context Enables Training-Free Cross Subject Brain Decoding: Mu Nan,

Muquan Yu,

Weijian Mai,

Jacob S. Prince,

Hossein Adeli,

Rui Zhang,

Jiahang Cao,

Benjamin Becker,

John A. Pyles,

Margaret M. Henderson,

Chunfeng Song,

Nikolaus Kriegeskorte,

Michael J. Tarr,

Xiaoqing Hu,

Andrew F. Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nan_2026_CVPR, author = {Nan, Mu and Yu, Muquan and Mai, Weijian and Prince, Jacob S. and Adeli, Hossein and Zhang, Rui and Cao, Jiahang and Becker, Benjamin and Pyles, John A. and Henderson, Margaret M. and Song, Chunfeng and Kriegeskorte, Nikolaus and Tarr, Michael J. and Hu, Xiaoqing and Luo, Andrew F.}, title = {Meta-Learning In-Context Enables Training-Free Cross Subject Brain Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3604-3616} }
fMRI-LM: Towards a Universal Foundation Model for Language-Aligned fMRI Understanding: Yuxiang Wei,

Yanteng Zhang,

Xi Xiao,

Chengxuan Qian,

Tianyang Wang,

Vince D. Calhoun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yuxiang and Zhang, Yanteng and Xiao, Xi and Qian, Chengxuan and Wang, Tianyang and Calhoun, Vince D.}, title = {fMRI-LM: Towards a Universal Foundation Model for Language-Aligned fMRI Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6931-6940} }
CLEX: Complementary Label Exchange Learning for Noisy Facial Expression Recognition: Lin Wang,

Fang Liu,

Xiaofen Xing,

Kailing Guo,

Xiangmin Xu; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lin and Liu, Fang and Xing, Xiaofen and Guo, Kailing and Xu, Xiangmin}, title = {CLEX: Complementary Label Exchange Learning for Noisy Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10577-10586} }
OneHOI: Unifying Human-Object Interaction Generation and Editing: Jiun Tian Hoe,

Weipeng Hu,

Xudong Jiang,

Yap-Peng Tan,

Chee Seng Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hoe_2026_CVPR, author = {Hoe, Jiun Tian and Hu, Weipeng and Jiang, Xudong and Tan, Yap-Peng and Chan, Chee Seng}, title = {OneHOI: Unifying Human-Object Interaction Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7664-7673} }
Reevaluating the Intra-Modal Misalignment Hypothesis in CLIP: Jonas Herzog,

Yue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Herzog_2026_CVPR, author = {Herzog, Jonas and Wang, Yue}, title = {Reevaluating the Intra-Modal Misalignment Hypothesis in CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24310-24319} }
Rel-Zero: Harnessing Patch-Pair Invariance for Robust Zero-Watermarking Against AI Editing: Pengzhen Chen,

Yanwei Liu,

Xiaoyan Gu,

Xiaojun Chen,

Wu Liu,

Weiping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhen and Liu, Yanwei and Gu, Xiaoyan and Chen, Xiaojun and Liu, Wu and Wang, Weiping}, title = {Rel-Zero: Harnessing Patch-Pair Invariance for Robust Zero-Watermarking Against AI Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3337-3346} }
Joint Spectral Image Reconstruction and Semantic Segmentation with Cooperative Unfolding: Zijun He,

Ping Wang,

Xiaodong Wang,

Chang Chen,

Xin Yuan; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zijun and Wang, Ping and Wang, Xiaodong and Chen, Chang and Yuan, Xin}, title = {Joint Spectral Image Reconstruction and Semantic Segmentation with Cooperative Unfolding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6910-6919} }
NOWA: Null-space Optical Watermark for Invisible Capture Fingerprinting and Tamper Localization: Edwin Vargas,

Jhon Lopez,

Henry Arguello,

Ashok Veeraraghavan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vargas_2026_CVPR, author = {Vargas, Edwin and Lopez, Jhon and Arguello, Henry and Veeraraghavan, Ashok}, title = {NOWA: Null-space Optical Watermark for Invisible Capture Fingerprinting and Tamper Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {102-112} }
Representation-Steered Incremental Adapter-Tuning for Class-Incremental Learning with Pre-Trained Models: Jiarui Zhao,

Libo Huang,

Xiangqi Li,

Zhulin An,

Chuanguang Yang,

Yu Wang,

Boyu Diao,

Yongjun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiarui and Huang, Libo and Li, Xiangqi and An, Zhulin and Yang, Chuanguang and Wang, Yu and Diao, Boyu and Xu, Yongjun}, title = {Representation-Steered Incremental Adapter-Tuning for Class-Incremental Learning with Pre-Trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18010-18020} }
HOG-Layout: Hierarchical 3D Scene Generation, Optimization and Editing via Vision-Language Models: Haiyan Jiang,

Deyu Zhang,

Dongdong Weng,

Weitao Song,

Henry Been-Lirn Duh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haiyan and Zhang, Deyu and Weng, Dongdong and Song, Weitao and Duh, Henry Been-Lirn}, title = {HOG-Layout: Hierarchical 3D Scene Generation, Optimization and Editing via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31586-31596} }
SeD-UD: An Influence-Driven and Hierarchically-Decoupled Information Bottleneck for Multimodal Intent Recognition: Qin Li,

Wenbo Zhang,

Limei Liu,

Han Peng,

Junfeng Yang,

Guanying Xu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qin and Zhang, Wenbo and Liu, Limei and Peng, Han and Yang, Junfeng and Xu, Guanying}, title = {SeD-UD: An Influence-Driven and Hierarchically-Decoupled Information Bottleneck for Multimodal Intent Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30346-30356} }
When Visualizing is the First Step to Reasoning: MIRA, a Benchmark for Visual Chain-of-Thought: Yiyang Zhou,

Haoqin Tu,

Zijun Wang,

Zeyu Wang,

Niklas Muennighoff,

Fan Nie,

Chaorui Deng,

Shen Yan,

Haoqi Fan,

Yejin Choi,

James Zou,

Cihang Xie,

Huaxiu Yao,

Qinghao Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yiyang and Tu, Haoqin and Wang, Zijun and Wang, Zeyu and Muennighoff, Niklas and Nie, Fan and Deng, Chaorui and Yan, Shen and Fan, Haoqi and Choi, Yejin and Zou, James and Xie, Cihang and Yao, Huaxiu and Ye, Qinghao}, title = {When Visualizing is the First Step to Reasoning: MIRA, a Benchmark for Visual Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26154-26164} }
Towards Reliable Evaluation of Adversarial Robustness for Spiking Neural Networks: Jihang Wang,

Dongcheng Zhao,

Ruolin Chen,

Qian Zhang,

Yi Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jihang and Zhao, Dongcheng and Chen, Ruolin and Zhang, Qian and Zeng, Yi}, title = {Towards Reliable Evaluation of Adversarial Robustness for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20691-20700} }
Spectral Scalpel: Amplifying Adjacent Action Discrepancy via Frequency-Selective Filtering for Skeleton-Based Action Segmentation: Haoyu Ji,

Bowen Chen,

Zhihao Yang,

Wenze Huang,

Yu Gao,

Xueting Liu,

Weihong Ren,

Zhiyong Wang,

Honghai Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Haoyu and Chen, Bowen and Yang, Zhihao and Huang, Wenze and Gao, Yu and Liu, Xueting and Ren, Weihong and Wang, Zhiyong and Liu, Honghai}, title = {Spectral Scalpel: Amplifying Adjacent Action Discrepancy via Frequency-Selective Filtering for Skeleton-Based Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12849-12859} }
SemiGDA: Generative Dual-distribution Alignment for Semi-Supervised Medical Image Segmentation: Kaiwen Huang,

Yi Zhou,

Yizhe Zhang,

Jingxiong Li,

Tao Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Kaiwen and Zhou, Yi and Zhang, Yizhe and Li, Jingxiong and Zhou, Tao}, title = {SemiGDA: Generative Dual-distribution Alignment for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1450-1460} }
DuetSVG: Unified Multimodal SVG Generation with Internal Visual Guidance: Peiying Zhang,

Nanxuan Zhao,

Matthew Fisher,

Yiran Xu,

Jing Liao,

Difan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peiying and Zhao, Nanxuan and Fisher, Matthew and Xu, Yiran and Liao, Jing and Liu, Difan}, title = {DuetSVG: Unified Multimodal SVG Generation with Internal Visual Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10219-10229} }
FACE: A Face-based Autoregressive Representation for High-Fidelity and Efficient Mesh Generation: Hanxiao Wang,

Yuan-Chen Guo,

Ying-Tian Liu,

Zi-Xin Zou,

Biao Zhang,

Weize Quan,

Ding Liang,

Yan-Pei Cao,

Dong-Ming Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hanxiao and Guo, Yuan-Chen and Liu, Ying-Tian and Zou, Zi-Xin and Zhang, Biao and Quan, Weize and Liang, Ding and Cao, Yan-Pei and Yan, Dong-Ming}, title = {FACE: A Face-based Autoregressive Representation for High-Fidelity and Efficient Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12719-12729} }
FAAR: Efficient Frequency-Aware Multi-Task Fine-Tuning via Automatic Rank Selection: Maxime Fontana,

Michael Spratling,

Miaojing Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fontana_2026_CVPR, author = {Fontana, Maxime and Spratling, Michael and Shi, Miaojing}, title = {FAAR: Efficient Frequency-Aware Multi-Task Fine-Tuning via Automatic Rank Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31135-31144} }
MatE: Material Extraction from Single-Image via Geometric Prior: Zeyu Zhang,

Wei Zhai,

Jian Yang,

Yang Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zeyu and Zhai, Wei and Yang, Jian and Cao, Yang}, title = {MatE: Material Extraction from Single-Image via Geometric Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12480-12490} }
GeoWorld: Geometric World Models: Zeyu Zhang,

Danning Li,

Ian Reid,

Richard Hartley; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zeyu and Li, Danning and Reid, Ian and Hartley, Richard}, title = {GeoWorld: Geometric World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30952-30963} }
MeshMosaic: Scaling Artist Mesh Generation via Local-to-Global Assembly: Rui Xu,

Tianyang Xue,

Qiujie Dong,

Le Wan,

Zhe Zhu,

Peng Li,

Zhiyang Dou,

Cheng Lin,

Shiqing Xin,

Yuan Liu,

Wenping Wang,

Taku Komura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Rui and Xue, Tianyang and Dong, Qiujie and Wan, Le and Zhu, Zhe and Li, Peng and Dou, Zhiyang and Lin, Cheng and Xin, Shiqing and Liu, Yuan and Wang, Wenping and Komura, Taku}, title = {MeshMosaic: Scaling Artist Mesh Generation via Local-to-Global Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20003-20013} }
GenMatter: Perceiving Physical Objects with Generative Matter Models: Eric Li,

Arijit Dasgupta,

Yoni Friedman,

Mathieu Huot,

Vikash Mansinghka,

Thomas O'Connell,

William T. Freeman,

Joshua B. Tenenbaum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Eric and Dasgupta, Arijit and Friedman, Yoni and Huot, Mathieu and Mansinghka, Vikash and O'Connell, Thomas and Freeman, William T. and Tenenbaum, Joshua B.}, title = {GenMatter: Perceiving Physical Objects with Generative Matter Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3165-3175} }
IFCSR: Inference-Free Fidelity-Realism Control for One-Step Diffusion-based Real-World Image Super-Resolution: Jonghee Back,

Jongju Kim,

Jeong-Uk Kim,

Eunjin Kim,

Minyong Jeon; [pdf] [supp]
[bibtex]
@InProceedings{Back_2026_CVPR, author = {Back, Jonghee and Kim, Jongju and Kim, Jeong-Uk and Kim, Eunjin and Jeon, Minyong}, title = {IFCSR: Inference-Free Fidelity-Realism Control for One-Step Diffusion-based Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38187-38197} }
SPDMark: Selective Parameter Displacement for Robust Video Watermarking: Samar Fares,

Nurbek Tastan,

Karthik Nandakumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fares_2026_CVPR, author = {Fares, Samar and Tastan, Nurbek and Nandakumar, Karthik}, title = {SPDMark: Selective Parameter Displacement for Robust Video Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10303-10312} }
4D Local Modeling Toward Dynamic Global Perception for Ambiguity-free Rotation-Invariant Point Cloud Analysis: Jiaxun Guo,

Wentao Fan,

Manar Amayri,

Nizar Bouguila; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Jiaxun and Fan, Wentao and Amayri, Manar and Bouguila, Nizar}, title = {4D Local Modeling Toward Dynamic Global Perception for Ambiguity-free Rotation-Invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31440-31449} }
OS-Oracle: A Comprehensive Framework for Cross-Platform GUI Critic Models: Zhenyu Wu,

Jingjing Xie,

Zehao Li,

Bowen Yang,

Qiushi Sun,

Zhaoyang Liu,

Zhoumianze Liu,

Yu Qiao,

Xiangyu Yue,

Zun Wang,

Zichen Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenyu and Xie, Jingjing and Li, Zehao and Yang, Bowen and Sun, Qiushi and Liu, Zhaoyang and Liu, Zhoumianze and Qiao, Yu and Yue, Xiangyu and Wang, Zun and Ding, Zichen}, title = {OS-Oracle: A Comprehensive Framework for Cross-Platform GUI Critic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27514-27524} }
Seele: A Unified Acceleration Framework for Real-Time Gaussian Splatting on Mobile Devices: He Zhu,

Xiaotong Huang,

Zihan Liu,

Weikai Lin,

Xiaohong Liu,

Zhezhi He,

Jingwen Leng,

Minyi Guo,

Yu Feng; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, He and Huang, Xiaotong and Liu, Zihan and Lin, Weikai and Liu, Xiaohong and He, Zhezhi and Leng, Jingwen and Guo, Minyi and Feng, Yu}, title = {Seele: A Unified Acceleration Framework for Real-Time Gaussian Splatting on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25979-25989} }
Mocap-2-to-3: Multi-view Lifting for Monocular Motion Recovery with 2D Pretraining: Zhumei Wang,

Zechen Hu,

Ruoxi Guo,

Huaijin Pi,

Ziyong Feng,

Liang Zhang,

Mingtao Pei,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhumei and Hu, Zechen and Guo, Ruoxi and Pi, Huaijin and Feng, Ziyong and Zhang, Liang and Pei, Mingtao and Huang, Siyuan}, title = {Mocap-2-to-3: Multi-view Lifting for Monocular Motion Recovery with 2D Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42869-42878} }
Seeing What Matters: A Training-Free Self-Guided Framework for Multimodal Detail Perception and Reasoning: Mingjie Ma,

yichao ma,

Zhong Yang,

Guohui Li; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Mingjie and ma, yichao and Yang, Zhong and Li, Guohui}, title = {Seeing What Matters: A Training-Free Self-Guided Framework for Multimodal Detail Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8727-8736} }
LayoutAD: Exploring Semantic-Geometric Misalignment Reasoning for Scene Layout Anomaly Detection: Zhichao Zeng,

Jiasheng Zhang,

Jiyun Sun,

Jiangtao Cui,

Xiaotian Qiao; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Zhichao and Zhang, Jiasheng and Sun, Jiyun and Cui, Jiangtao and Qiao, Xiaotian}, title = {LayoutAD: Exploring Semantic-Geometric Misalignment Reasoning for Scene Layout Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35567-35576} }
MedMO: Grounding and Understanding Multimodal Large Language Model for Medical Images: Ankan Deria,

Komal Kumar,

Adinath Madhavrao Dukre,

Eran Segal,

Salman Khan,

Imran Razzak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deria_2026_CVPR, author = {Deria, Ankan and Kumar, Komal and Dukre, Adinath Madhavrao and Segal, Eran and Khan, Salman and Razzak, Imran}, title = {MedMO: Grounding and Understanding Multimodal Large Language Model for Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5092-5103} }
Stitch-a-Demo: Creating Video Demonstrations from Multistep Descriptions: Chi Hsuan Wu,

Kumar Ashutosh,

Kristen Grauman; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chi Hsuan and Ashutosh, Kumar and Grauman, Kristen}, title = {Stitch-a-Demo: Creating Video Demonstrations from Multistep Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23988-23999} }
Few-Step Diffusion Sampling Through Instance-Aware Discretizations: Liangyu Yuan,

Ruoyu Wang,

Tong Zhao,

Dingwen Fu,

Mingkun Lei,

Beier Zhu,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Liangyu and Wang, Ruoyu and Zhao, Tong and Fu, Dingwen and Lei, Mingkun and Zhu, Beier and Zhang, Chi}, title = {Few-Step Diffusion Sampling Through Instance-Aware Discretizations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35882-35892} }
Revisiting 3D Reconstruction Kernels as Low-Pass Filters: Shengjun Zhang,

Min Chen,

Yibo Wei,

Mingyu Dong,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shengjun and Chen, Min and Wei, Yibo and Dong, Mingyu and Duan, Yueqi}, title = {Revisiting 3D Reconstruction Kernels as Low-Pass Filters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33374-33383} }
Mining Instance-Centric Vision-Language Contexts for Human-Object Interaction Detection: Soo Won Seo,

KyungChae Lee,

Hyungchan Cho,

Taein Son,

Nam Ik Cho,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Soo Won and Lee, KyungChae and Cho, Hyungchan and Son, Taein and Cho, Nam Ik and Choi, Jun Won}, title = {Mining Instance-Centric Vision-Language Contexts for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40418-40427} }
P2GS: Physical Prior-guided Gaussian Splatting for Photometrically Consistent Urban Reconstruction: Kota Shimomura,

Hidehisa Arai,

Tsubasa Takahashi,

Takayoshi Yamashita,

Hironobu Fujiyoshi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shimomura_2026_CVPR, author = {Shimomura, Kota and Arai, Hidehisa and Takahashi, Tsubasa and Yamashita, Takayoshi and Fujiyoshi, Hironobu}, title = {P2GS: Physical Prior-guided Gaussian Splatting for Photometrically Consistent Urban Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11736-11745} }
UNI-OOD: Unified Object- and Image-level Out-of-Distribution Detection via Cross-Context Attentive Vision-Language Modeling: Yuchuan Li,

Azadeh Motamedi,

Hyock Ju Kwon,

Chul B Park,

Il-Min Kim; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuchuan and Motamedi, Azadeh and Kwon, Hyock Ju and Park, Chul B and Kim, Il-Min}, title = {UNI-OOD: Unified Object- and Image-level Out-of-Distribution Detection via Cross-Context Attentive Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6282-6292} }
SpatialVID: A Large-Scale Video Dataset with Spatial Annotations: Jiahao Wang,

Yufeng Yuan,

Rujie Zheng,

Youtian Lin,

Jian Gao,

Lin-Zhuo Chen,

Yajie Bao,

Chang Zeng,

Yanxi Zhou,

Xiao-Xiao Long,

Hao Zhu,

Zhaoxiang Zhang,

Xun Cao,

Yao Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Yuan, Yufeng and Zheng, Rujie and Lin, Youtian and Gao, Jian and Chen, Lin-Zhuo and Bao, Yajie and Zeng, Chang and Zhou, Yanxi and Long, Xiao-Xiao and Zhu, Hao and Zhang, Zhaoxiang and Cao, Xun and Yao, Yao}, title = {SpatialVID: A Large-Scale Video Dataset with Spatial Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42592-42603} }
Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval: Zhiheng Fu,

Yupeng Hu,

Qianyun Yang,

Shiqi Zhang,

Zhiwei Chen,

Zixu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Zhiheng and Hu, Yupeng and Yang, Qianyun and Zhang, Shiqi and Chen, Zhiwei and Li, Zixu}, title = {Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2658-2670} }
MeshSplatting: Differentiable Rendering with Opaque Meshes: Jan Held,

Sanghyun Son,

Renaud Vandeghen,

Daniel Rebain,

Matheus Gadelha,

Yi Zhou,

Anthony Cioppa,

Ming C. Lin,

Marc Van Droogenbroeck,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Held_2026_CVPR, author = {Held, Jan and Son, Sanghyun and Vandeghen, Renaud and Rebain, Daniel and Gadelha, Matheus and Zhou, Yi and Cioppa, Anthony and Lin, Ming C. and Van Droogenbroeck, Marc and Tagliasacchi, Andrea}, title = {MeshSplatting: Differentiable Rendering with Opaque Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7320-7329} }
ReasonMap: Towards Fine-Grained Visual Reasoning from Transit Maps: Sicheng Feng,

Song Wang,

Shuyi Ouyang,

Lingdong Kong,

Zikai Song,

Jianke Zhu,

Huan Wang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Sicheng and Wang, Song and Ouyang, Shuyi and Kong, Lingdong and Song, Zikai and Zhu, Jianke and Wang, Huan and Wang, Xinchao}, title = {ReasonMap: Towards Fine-Grained Visual Reasoning from Transit Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41077-41088} }
3D sans 3D Scans: Scalable Pre-training from Video-Generated Point Clouds: Ryousuke Yamada,

Kohsuke Ide,

Yoshihiro Fukuhara,

Hirokatsu Kataoka,

Gilles Puy,

Andrei Bursuc,

Yuki M. Asano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamada_2026_CVPR, author = {Yamada, Ryousuke and Ide, Kohsuke and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Puy, Gilles and Bursuc, Andrei and Asano, Yuki M.}, title = {3D sans 3D Scans: Scalable Pre-training from Video-Generated Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39075-39085} }
DK-DDIL: Adaptive Knowledge Retention for Dynamic Domain-Incremental Learning in Medical Imaging: Yuxi Ma,

Sujie Liu,

Jing Yang,

Jiacheng Wang,

Yiping Chen,

Baptiste Magnier,

Liansheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yuxi and Liu, Sujie and Yang, Jing and Wang, Jiacheng and Chen, Yiping and Magnier, Baptiste and Wang, Liansheng}, title = {DK-DDIL: Adaptive Knowledge Retention for Dynamic Domain-Incremental Learning in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36290-36299} }
CUPID: Generative 3D Reconstruction via Joint Object and Pose Modeling: Binbin Huang,

Haobin Duan,

Yiqun Zhao,

Zibo Zhao,

Yi Ma,

Shenghua Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Binbin and Duan, Haobin and Zhao, Yiqun and Zhao, Zibo and Ma, Yi and Gao, Shenghua}, title = {CUPID: Generative 3D Reconstruction via Joint Object and Pose Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12741-12752} }
Gaussian Splatting-based Low-Rank Tensor Representation for Multi-Dimensional Image Recovery: Yiming Zeng,

Xi-Le Zhao,

Wei-Hao Wu,

Teng-Yu Ji,

Chao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yiming and Zhao, Xi-Le and Wu, Wei-Hao and Ji, Teng-Yu and Wang, Chao}, title = {Gaussian Splatting-based Low-Rank Tensor Representation for Multi-Dimensional Image Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19360-19369} }
Semantic-Guided Global-Local Collaborative Prompt Learning for Few-Shot Class Incremental Learning: Yongxin Yan,

Weisen Chen,

Xingye Chen,

Yuanjie Shao,

Zhengrong Zuo,

Wenming Tan,

Wenqi Ren,

Changxin Gao,

Nong Sang; [pdf]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Yongxin and Chen, Weisen and Chen, Xingye and Shao, Yuanjie and Zuo, Zhengrong and Tan, Wenming and Ren, Wenqi and Gao, Changxin and Sang, Nong}, title = {Semantic-Guided Global-Local Collaborative Prompt Learning for Few-Shot Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5511-5520} }
Improving Adversarial Transferability with Local Perturbation Augmentation: Jian-Xun Mi,

Xuanhui Zhong,

Weisheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Mi_2026_CVPR, author = {Mi, Jian-Xun and Zhong, Xuanhui and Li, Weisheng}, title = {Improving Adversarial Transferability with Local Perturbation Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20639-20649} }
MUST: Modality-Specific Representation-Aware Transformer for Diffusion-Enhanced Survival Prediction with Missing Modality: Kyungwon Kim,

Dosik Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Kyungwon and Hwang, Dosik}, title = {MUST: Modality-Specific Representation-Aware Transformer for Diffusion-Enhanced Survival Prediction with Missing Modality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30312-30321} }
FM-Steer: Enhance Generalist Policies with Value-Guided Cascaded Denoising: Haoming Song,

Delin Qu,

Yuanqi Yao,

Qizhi Chen,

Jiarui Li,

Qi Lv,

Yiwen Tang,

Li Kang,

Heng Zhou,

Xianqiang Gao,

Yuhang Tang,

Xiaofan Li,

Modi Shi,

Guanghui Ren,

Maoqing Yao,

Bin Zhao,

Dong Wang,

Xuelong Li; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Haoming and Qu, Delin and Yao, Yuanqi and Chen, Qizhi and Li, Jiarui and Lv, Qi and Tang, Yiwen and Kang, Li and Zhou, Heng and Gao, Xianqiang and Tang, Yuhang and Li, Xiaofan and Shi, Modi and Ren, Guanghui and Yao, Maoqing and Zhao, Bin and Wang, Dong and Li, Xuelong}, title = {FM-Steer: Enhance Generalist Policies with Value-Guided Cascaded Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13407-13418} }
Diff-SemiER: Transparency-Aware Adaptive Fusion Diffusion Model with Generative Prior for Semi-Transparent Eyeglasses Removal: Jiahao Li,

Shiqi Yin,

Zhenxiang Lian,

Jingtao Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Yin, Shiqi and Lian, Zhenxiang and Guo, Jingtao}, title = {Diff-SemiER: Transparency-Aware Adaptive Fusion Diffusion Model with Generative Prior for Semi-Transparent Eyeglasses Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30813-30822} }
LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction: Tianye Ding,

Yiming Xie,

Yiqing Liang,

Moitreya Chatterjee,

Pedro Miraldo,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Tianye and Xie, Yiming and Liang, Yiqing and Chatterjee, Moitreya and Miraldo, Pedro and Jiang, Huaizu}, title = {LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36433-36443} }
Hierarchical Codec Diffusion for Video-to-Speech Generation: Jiaxin Ye,

Gaoxiang Cong,

Chenhui Wang,

Xin-Cheng Wen,

Zhaoyang Li,

Boyuan Cao,

Hongming Shan; [pdf] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Jiaxin and Cong, Gaoxiang and Wang, Chenhui and Wen, Xin-Cheng and Li, Zhaoyang and Cao, Boyuan and Shan, Hongming}, title = {Hierarchical Codec Diffusion for Video-to-Speech Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43352-43362} }
FlowFixer: Towards Detail-Preserving Subject-Driven Generation: Jinyoung Jun,

Won-Dong Jang,

Wenbin Ouyang,

Raghudeep Gadde,

Jungbeom Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jun_2026_CVPR, author = {Jun, Jinyoung and Jang, Won-Dong and Ouyang, Wenbin and Gadde, Raghudeep and Lee, Jungbeom}, title = {FlowFixer: Towards Detail-Preserving Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22049-22058} }
Efficient Encoder-Free Fourier-based 3D Large Multimodal Model: Guofeng Mei,

Wei Lin,

Luigi Riz,

Yujiao Wu,

Yiming Wang,

Fabio Poiesi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2026_CVPR, author = {Mei, Guofeng and Lin, Wei and Riz, Luigi and Wu, Yujiao and Wang, Yiming and Poiesi, Fabio}, title = {Efficient Encoder-Free Fourier-based 3D Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23785-23794} }
Scal3R: Scalable Test-Time Training for Large-Scale 3D Reconstruction: Tao Xie,

Peishan Yang,

Yudong Jin,

Yingfeng Cai,

Wei Yin,

Weiqiang Ren,

Qian Zhang,

Wei Hua,

Sida Peng,

Xiaoyang Guo,

Xiaowei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tao and Yang, Peishan and Jin, Yudong and Cai, Yingfeng and Yin, Wei and Ren, Weiqiang and Zhang, Qian and Hua, Wei and Peng, Sida and Guo, Xiaoyang and Zhou, Xiaowei}, title = {Scal3R: Scalable Test-Time Training for Large-Scale 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21760-21771} }
SparVAR: Exploring Sparsity in Visual AutoRegressive Modeling for Training-Free Acceleration: Zekun Li,

Ning Wang,

Tongxin Bai,

Changwang Mei,

Peisong Wang,

Shuang Qiu,

Jian Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zekun and Wang, Ning and Bai, Tongxin and Mei, Changwang and Wang, Peisong and Qiu, Shuang and Cheng, Jian}, title = {SparVAR: Exploring Sparsity in Visual AutoRegressive Modeling for Training-Free Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19308-19318} }
LLM-Guided Probabilistic Fusion for Label-Efficient Document Layout Analysis: Ibne Farabi Shihab,

Sanjeda Akter,

Anuj Sharma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shihab_2026_CVPR, author = {Shihab, Ibne Farabi and Akter, Sanjeda and Sharma, Anuj}, title = {LLM-Guided Probabilistic Fusion for Label-Efficient Document Layout Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3574-3583} }
Head-wise Adaptive Rotary Positional Encoding for Fine-Grained Image Generation: Jiaye Li,

Baoyou Chen,

Hui Li,

Zilong Dong,

Jingdong Wang,

Siyu Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiaye and Chen, Baoyou and Li, Hui and Dong, Zilong and Wang, Jingdong and Zhu, Siyu}, title = {Head-wise Adaptive Rotary Positional Encoding for Fine-Grained Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26338-26347} }
SO-Bench: A Structural Output Evaluation of Multimodal LLM: Di Feng,

Kaixin Ma,

Feng Nan,

Haofeng Chen,

Bohan Zhai,

David Griffiths,

Mingfei Gao,

Zhe Gan,

Eshan Verma,

Yinfei Yang,

Zhifeng Chen,

Afshin Dehghan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Di and Ma, Kaixin and Nan, Feng and Chen, Haofeng and Zhai, Bohan and Griffiths, David and Gao, Mingfei and Gan, Zhe and Verma, Eshan and Yang, Yinfei and Chen, Zhifeng and Dehghan, Afshin}, title = {SO-Bench: A Structural Output Evaluation of Multimodal LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37694-37704} }
Eulerian Gaussian Splatting using Hashed Probability Pyramids: Mia Gaia Polansky,

George Kopanas,

Stephan Garbin,

Todd Zickler,

Dor Verbin; [pdf] [supp]
[bibtex]
@InProceedings{Polansky_2026_CVPR, author = {Polansky, Mia Gaia and Kopanas, George and Garbin, Stephan and Zickler, Todd and Verbin, Dor}, title = {Eulerian Gaussian Splatting using Hashed Probability Pyramids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19045-19053} }
Real-Time Neural Video Compression with Unified Intra and Inter Coding: Hui Xiang,

Yifan Bian,

Li Li,

Jingran Wu,

Xianguo Zhang,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Hui and Bian, Yifan and Li, Li and Wu, Jingran and Zhang, Xianguo and Liu, Dong}, title = {Real-Time Neural Video Compression with Unified Intra and Inter Coding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35217-35226} }
RebRL: Reinforcing Discrete Visual Diffusion Models with Rebalanced Timestep Credits: Mu Zhang,

Tianren Ma,

Yunfan Liu,

Kun Hu,

Qixiang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mu and Ma, Tianren and Liu, Yunfan and Hu, Kun and Ye, Qixiang}, title = {RebRL: Reinforcing Discrete Visual Diffusion Models with Rebalanced Timestep Credits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43135-43144} }
COPO: Causal-Oriented Policy Optimization for Hallucinations of MLLMs: Peizheng Guo,

Jingyao Wang,

Wenwen Qiang,

Jiahuan Zhou,

Changwen Zheng,

Gang Hua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Peizheng and Wang, Jingyao and Qiang, Wenwen and Zhou, Jiahuan and Zheng, Changwen and Hua, Gang}, title = {COPO: Causal-Oriented Policy Optimization for Hallucinations of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11051-11063} }
Task-Aware Image Signal Processor for Advanced Visual Perception: Kai Chen,

Jin Xiao,

Leheng Zhang,

Kexuan Shi,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Kai and Xiao, Jin and Zhang, Leheng and Shi, Kexuan and Gu, Shuhang}, title = {Task-Aware Image Signal Processor for Advanced Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33672-33681} }
AutoTraces: Autoregressive Trajectory Forecasting via Multimodal Large Language Models: Teng Wang,

Yanting Lu,

Ruize Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Teng and Lu, Yanting and Wang, Ruize}, title = {AutoTraces: Autoregressive Trajectory Forecasting via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4054-4064} }
Think Visually, Reason Textually: Vision-Language Synergy in Abstract Reasoning: Beichen Zhang,

Yuhang Zang,

Xiaoyi Dong,

Yuhang Cao,

Haodong Duan,

Dahua Lin,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beichen and Zang, Yuhang and Dong, Xiaoyi and Cao, Yuhang and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Think Visually, Reason Textually: Vision-Language Synergy in Abstract Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41203-41212} }
UniLight: A Unified Representation for Lighting: Zitian Zhang,

Iliyan Georgiev,

Michael Fischer,

Yannick Hold-Geoffroy,

Jean-Francois Lalonde,

Valentin Deschaintre; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zitian and Georgiev, Iliyan and Fischer, Michael and Hold-Geoffroy, Yannick and Lalonde, Jean-Francois and Deschaintre, Valentin}, title = {UniLight: A Unified Representation for Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29684-29694} }
Lyapunov Probes for Hallucination Detection in Large Foundation Models: Bozhi Luan,

Gen Li,

Yalan Qin,

Jifeng Guo,

Yun Zhou,

Faguo Wu,

Hongwei Zheng,

Wenjun Wu,

Zhaoxin Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Luan_2026_CVPR, author = {Luan, Bozhi and Li, Gen and Qin, Yalan and Guo, Jifeng and Zhou, Yun and Wu, Faguo and Zheng, Hongwei and Wu, Wenjun and Fan, Zhaoxin}, title = {Lyapunov Probes for Hallucination Detection in Large Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25336-25346} }
Do VLMs Perceive or Recall? Probing Visual Perception vs. Memory with Classic Visual Illusions: Xiaoxiao Sun,

Mingyang Li,

Kun Yuan,

Min Woo Sun,

Mark Endo,

Shengguang Wu,

Changlin Li,

Yuhui Zhang,

Zeyu Wang,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaoxiao and Li, Mingyang and Yuan, Kun and Sun, Min Woo and Endo, Mark and Wu, Shengguang and Li, Changlin and Zhang, Yuhui and Wang, Zeyu and Yeung-Levy, Serena}, title = {Do VLMs Perceive or Recall? Probing Visual Perception vs. Memory with Classic Visual Illusions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25861-25870} }
Bidirectional Cross-Modal Prompting for Event-Frame Asymmetric Stereo: Ninghui Xu,

Fabio Tosi,

Lihui Wang,

Jiawei Han,

Luca Bartolomei,

Zhiting Yao,

Matteo Poggi,

Stefano Mattoccia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Ninghui and Tosi, Fabio and Wang, Lihui and Han, Jiawei and Bartolomei, Luca and Yao, Zhiting and Poggi, Matteo and Mattoccia, Stefano}, title = {Bidirectional Cross-Modal Prompting for Event-Frame Asymmetric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {914-925} }
MaskDexGrasp: Generative Masked Modeling for Part-Aware Dexterous Grasp Synthesis: Binghui Zuo,

Lin Zhou,

Haoxuan Xu,

Jianan Yan,

Zhipeng Yu,

Zekai Liu,

Yangang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Binghui and Zhou, Lin and Xu, Haoxuan and Yan, Jianan and Yu, Zhipeng and Liu, Zekai and Wang, Yangang}, title = {MaskDexGrasp: Generative Masked Modeling for Part-Aware Dexterous Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29598-29609} }
Disco-GS: Gaussian Splatting in Dynamic Color Lighting: Ashish Kumar,

A. N. Rajagopalan; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and Rajagopalan, A. N.}, title = {Disco-GS: Gaussian Splatting in Dynamic Color Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11891-11900} }
Infinity-RoPE: Action-Controllable Infinite Video Generation Emerges From Autoregressive Self-Rollout: Hidir Yesiltepe,

Tuna Meral,

Adil Kaan Akan,

Kaan Oktay,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yesiltepe_2026_CVPR, author = {Yesiltepe, Hidir and Meral, Tuna and Akan, Adil Kaan and Oktay, Kaan and Yanardag, Pinar}, title = {Infinity-RoPE: Action-Controllable Infinite Video Generation Emerges From Autoregressive Self-Rollout}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40256-40265} }
Learning Latent Transmission and Glare Maps for Lens Veiling Glare Removal: Xiaolong Qian,

Qi Jiang,

Lei Sun,

Zongxi Yu,

Kailun Yang,

Peixuan Wu,

Jiacheng Zhou,

Yao Gao,

Yaoguang Ma,

Ming-Hsuan Yang,

Kaiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Xiaolong and Jiang, Qi and Sun, Lei and Yu, Zongxi and Yang, Kailun and Wu, Peixuan and Zhou, Jiacheng and Gao, Yao and Ma, Yaoguang and Yang, Ming-Hsuan and Wang, Kaiwei}, title = {Learning Latent Transmission and Glare Maps for Lens Veiling Glare Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33995-34005} }
PureCC: Pure Learning for Text-to-Image Concept Customization: Zhichao Liao,

Xiaole Xian,

Qingyu Li,

Wenyu Qin,

Meng Wang,

Weicheng Xie,

Siyang Song,

Pingfa Feng,

Long Zeng,

Liang Pan; [pdf] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Zhichao and Xian, Xiaole and Li, Qingyu and Qin, Wenyu and Wang, Meng and Xie, Weicheng and Song, Siyang and Feng, Pingfa and Zeng, Long and Pan, Liang}, title = {PureCC: Pure Learning for Text-to-Image Concept Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7730-7740} }
UnicEdit-10M: A Dataset and Benchmark Breaking the Scale-Quality Barrier via Unified Verification for Reasoning-Enriched Edits: Keming Ye,

Zhipeng Huang,

Canmiao Fu,

Qingyang Liu,

Jiani Cai,

Zheqi Lv,

Chen Li,

Jing LYU,

Zhou Zhao,

Shengyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Keming and Huang, Zhipeng and Fu, Canmiao and Liu, Qingyang and Cai, Jiani and Lv, Zheqi and Li, Chen and LYU, Jing and Zhao, Zhou and Zhang, Shengyu}, title = {UnicEdit-10M: A Dataset and Benchmark Breaking the Scale-Quality Barrier via Unified Verification for Reasoning-Enriched Edits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37279-37289} }
R$^2$TUA: Reconstruction-residual Based Targeted and Untargeted Attack Against Text-Image Person Re-Identification: Yubo Wang,

Yan Lu,

Bin Liu,

Xulin Li,

Jixiang Niu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yubo and Lu, Yan and Liu, Bin and Li, Xulin and Niu, Jixiang}, title = {R\${\textasciicircum}2\$TUA: Reconstruction-residual Based Targeted and Untargeted Attack Against Text-Image Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22857-22866} }
U^2Flow: Uncertainty-Aware Unsupervised Optical Flow Estimation: Xunpei Sun,

Wenwei Lin,

Yi Chang,

Gang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xunpei and Lin, Wenwei and Chang, Yi and Chen, Gang}, title = {U{\textasciicircum}2Flow: Uncertainty-Aware Unsupervised Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28690-28700} }
Back to Basics: Let Denoising Generative Models Denoise: Tianhong Li,

Kaiming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tianhong and He, Kaiming}, title = {Back to Basics: Let Denoising Generative Models Denoise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36115-36125} }
PvP: Data-Efficient Humanoid Robot Learning with Proprioceptive-Privileged Contrastive Representations: Mingqi Yuan,

Tao Yu,

Haolin Song,

Bo Li,

Xin Jin,

Hua Chen,

Wenjun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Mingqi and Yu, Tao and Song, Haolin and Li, Bo and Jin, Xin and Chen, Hua and Zeng, Wenjun}, title = {PvP: Data-Efficient Humanoid Robot Learning with Proprioceptive-Privileged Contrastive Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42375-42385} }
Scaling Dense Event-Stream Pretraining from Visual Foundation Models: Zhiwen Chen,

Junhui Hou,

Zhiyu Zhu,

Jinjian Wu,

Guangming Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhiwen and Hou, Junhui and Zhu, Zhiyu and Wu, Jinjian and Shi, Guangming}, title = {Scaling Dense Event-Stream Pretraining from Visual Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8011-8022} }
SemLT3D: Semantic-Guided Expert Distillation for Camera-only Long-Tailed 3D Object Detection: Hao Vo,

Khoa Vo,

Thinh Phan,

Ngo Xuan Cuong,

Gianfranco Doretto,

Hien Nguyen,

Anh Nguyen,

Ngan Le; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vo_2026_CVPR, author = {Vo, Hao and Vo, Khoa and Phan, Thinh and Cuong, Ngo Xuan and Doretto, Gianfranco and Nguyen, Hien and Nguyen, Anh and Le, Ngan}, title = {SemLT3D: Semantic-Guided Expert Distillation for Camera-only Long-Tailed 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25788-25798} }
When Do Models Actually Decide? Mapping the Layer-Wise Decision Timeline in Pretrained Neural Networks: Minhyeok Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {When Do Models Actually Decide? Mapping the Layer-Wise Decision Timeline in Pretrained Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34546-34554} }
CoWTracker: Tracking by Warping instead of Correlation: Zihang Lai,

Eldar Insafutdinov,

Edgar Sucar,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Zihang and Insafutdinov, Eldar and Sucar, Edgar and Vedaldi, Andrea}, title = {CoWTracker: Tracking by Warping instead of Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42571-42580} }
Understanding Temporal Logic Consistency in Video-Language Models through Cross-Modal Attention Discriminability: Chengzhi Li,

Heyan Huang,

Ping Jian,

Zhen Yang,

Yaning Tian,

Zhongbin Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chengzhi and Huang, Heyan and Jian, Ping and Yang, Zhen and Tian, Yaning and Guo, Zhongbin}, title = {Understanding Temporal Logic Consistency in Video-Language Models through Cross-Modal Attention Discriminability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31812-31821} }
CapNav: Benchmarking Vision Language Models on Capability-conditioned Indoor Navigation: Xia Su,

Ruiqi Chen,

Benlin Liu,

Jingwei Ma,

Zonglin Di,

Ranjay Krishna,

Jon Froehlich; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Xia and Chen, Ruiqi and Liu, Benlin and Ma, Jingwei and Di, Zonglin and Krishna, Ranjay and Froehlich, Jon}, title = {CapNav: Benchmarking Vision Language Models on Capability-conditioned Indoor Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4043-4053} }
Where, What, Why: Toward Explainable 3D-GS Watermarking: Mingshu Cai,

Jiajun Li,

Osamu Yoshie,

Yuya Ieiri,

Yixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Mingshu and Li, Jiajun and Yoshie, Osamu and Ieiri, Yuya and Li, Yixuan}, title = {Where, What, Why: Toward Explainable 3D-GS Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20701-20710} }
WEAVE: Unleashing and Benchmarking the In-context Interleaved Comprehension and Generation: Wei Chow,

Jiachun Pan,

Yongyuan Liang,

Mingze Zhou,

Xue Song,

Liyu Jia,

Saining Zhang,

Siliang Tang,

Juncheng Li,

Fengda Zhang,

Weijia Wu,

Hanwang Zhang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chow_2026_CVPR, author = {Chow, Wei and Pan, Jiachun and Liang, Yongyuan and Zhou, Mingze and Song, Xue and Jia, Liyu and Zhang, Saining and Tang, Siliang and Li, Juncheng and Zhang, Fengda and Wu, Weijia and Zhang, Hanwang and Chua, Tat-Seng}, title = {WEAVE: Unleashing and Benchmarking the In-context Interleaved Comprehension and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15343-15353} }
Rascene: High-Fidelity 3D Scene Imaging with mmWave Communication Signals: Kunzhe Song,

Geo Jie Zhou,

Xiaoming Liu,

Huacheng Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Kunzhe and Zhou, Geo Jie and Liu, Xiaoming and Zeng, Huacheng}, title = {Rascene: High-Fidelity 3D Scene Imaging with mmWave Communication Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36454-36463} }
Where Culture Fades: Revealing the Cultural Gap in Text-to-Image Generation: Chuancheng Shi,

Shangze Li,

Shiming Guo,

Simiao Xie,

Wenhua Wu,

Jingtong Dou,

Chao Wu,

Canran Xiao,

Cong Wang,

Zifeng Cheng,

Fei Shen,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Chuancheng and Li, Shangze and Guo, Shiming and Xie, Simiao and Wu, Wenhua and Dou, Jingtong and Wu, Chao and Xiao, Canran and Wang, Cong and Cheng, Zifeng and Shen, Fei and Chua, Tat-Seng}, title = {Where Culture Fades: Revealing the Cultural Gap in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14702-14712} }
Back to Point: Exploring Point-Language Models for Zero-Shot 3D Anomaly Detection: Kaiqiang Li,

Gang Li,

Mingle Zhou,

Min Li,

Delong Han,

Jin Wan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kaiqiang and Li, Gang and Zhou, Mingle and Li, Min and Han, Delong and Wan, Jin}, title = {Back to Point: Exploring Point-Language Models for Zero-Shot 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14167-14177} }
Depth Hypothesis Guided Iterative Refinement for Event-Image Monocular Depth Estimation: Daikun Liu,

Teng Wang,

Changyin Sun; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Daikun and Wang, Teng and Sun, Changyin}, title = {Depth Hypothesis Guided Iterative Refinement for Event-Image Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29504-29513} }
Transition Matching Distillation for Fast Video Generation: Weili Nie,

Julius Berner,

Nanye Ma,

Chao Liu,

Saining Xie,

Arash Vahdat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Weili and Berner, Julius and Ma, Nanye and Liu, Chao and Xie, Saining and Vahdat, Arash}, title = {Transition Matching Distillation for Fast Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4645-4655} }
MoCapAnything: Unified 3D Motion Capture for Arbitrary Skeletons from Monocular Videos: Kehong Gong,

Zhengyu Wen,

Weixia He,

Mingxi Xu,

Qi Wang,

Ning Zhang,

Zhengyu Li,

Dongze Lian,

Wei Zhao,

Xiaoyu He,

Mingyuan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Kehong and Wen, Zhengyu and He, Weixia and Xu, Mingxi and Wang, Qi and Zhang, Ning and Li, Zhengyu and Lian, Dongze and Zhao, Wei and He, Xiaoyu and Zhang, Mingyuan}, title = {MoCapAnything: Unified 3D Motion Capture for Arbitrary Skeletons from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7089-7099} }
SPE-MVS: Spatial Position Encoding Enhanced Multi-View Stereo with Monocular Depth Priors: Shaoqian Wang,

Jiadai Sun,

Bosen Hou,

Qiang Wang,

Bin Fan,

Bo Li,

Bin Lu,

Yuchao Dai; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shaoqian and Sun, Jiadai and Hou, Bosen and Wang, Qiang and Fan, Bin and Li, Bo and Lu, Bin and Dai, Yuchao}, title = {SPE-MVS: Spatial Position Encoding Enhanced Multi-View Stereo with Monocular Depth Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14536-14545} }
SDUIE: Semi-Supervised Diffusion for Underwater Image Enhancement with Quant-Text Dual Control: Xiaofeng Cong,

Yu-Xin Zhang,

Hao Shen,

Yeying Jin,

Junming Hou,

Jie Gui; [pdf] [supp]
[bibtex]
@InProceedings{Cong_2026_CVPR, author = {Cong, Xiaofeng and Zhang, Yu-Xin and Shen, Hao and Jin, Yeying and Hou, Junming and Gui, Jie}, title = {SDUIE: Semi-Supervised Diffusion for Underwater Image Enhancement with Quant-Text Dual Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37423-37433} }
No Calibration, No Depth, No Problem: Cross-Sensor View Synthesis with 3D Consistency: Cho-Ying Wu,

Zixun Huang,

Xinyu Huang,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Cho-Ying and Huang, Zixun and Huang, Xinyu and Ren, Liu}, title = {No Calibration, No Depth, No Problem: Cross-Sensor View Synthesis with 3D Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21836-21848} }
From Static to Dynamic: Exploring Self-supervised Image-to-Video Representation Transfer Learning: Yang Liu,

Qianqian Xu,

Peisong Wen,

Siran Dai,

Xilin Zhao,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Xu, Qianqian and Wen, Peisong and Dai, Siran and Zhao, Xilin and Huang, Qingming}, title = {From Static to Dynamic: Exploring Self-supervised Image-to-Video Representation Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31250-31261} }
Transition Models: Rethinking the Generative Learning Objective: Zidong Wang,

Yiyuan Zhang,

Xiaoyu Yue,

Xiangyu Yue,

Yangguang Li,

Wanli Ouyang,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zidong and Zhang, Yiyuan and Yue, Xiaoyu and Yue, Xiangyu and Li, Yangguang and Ouyang, Wanli and Bai, Lei}, title = {Transition Models: Rethinking the Generative Learning Objective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29178-29189} }
Unified Spatiotemporal Token Compression for Video-LLMs at Ultra-Low Retention: Junhao Du,

Jialong Xue,

Anqi Li,

Jincheng Dai,

Guo Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Junhao and Xue, Jialong and Li, Anqi and Dai, Jincheng and Lu, Guo}, title = {Unified Spatiotemporal Token Compression for Video-LLMs at Ultra-Low Retention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17661-17671} }
Hybrid Robust Collaborative Perception with LiDAR-4D Radar Fusion under Adverse Weather Conditions: Yuquan Yang,

Hui Zhang,

Wenyu Lu,

Ziyin Zhang,

Chuanming Zhang,

Xiaohua Xu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuquan and Zhang, Hui and Lu, Wenyu and Zhang, Ziyin and Zhang, Chuanming and Xu, Xiaohua}, title = {Hybrid Robust Collaborative Perception with LiDAR-4D Radar Fusion under Adverse Weather Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24237-24247} }
Bootstrapping Multi-view Learning for Test-time Noisy Correspondence: Changhao He,

Di Xue,

Shuxian Li,

Yanji Hao,

Xi Peng,

Peng Hu; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Changhao and Xue, Di and Li, Shuxian and Hao, Yanji and Peng, Xi and Hu, Peng}, title = {Bootstrapping Multi-view Learning for Test-time Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1627-1638} }
M3DLayout: A Multi-Source Dataset of 3D Indoor Layouts and Structured Descriptions for 3D Generation: Yiheng Zhang,

Zhuojiang Cai,

Mingdao Wang,

Meitong Guo,

Tianxiao Li,

Li Lin,

Yuwang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiheng and Cai, Zhuojiang and Wang, Mingdao and Guo, Meitong and Li, Tianxiao and Lin, Li and Wang, Yuwang}, title = {M3DLayout: A Multi-Source Dataset of 3D Indoor Layouts and Structured Descriptions for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34217-34226} }
Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models: Yuehao Liu,

Shanyan Guan,

Weijia Zhang,

Xuanming Shang,

Yanhao Ge,

Wei Li,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuehao and Guan, Shanyan and Zhang, Weijia and Shang, Xuanming and Ge, Yanhao and Li, Wei and Ma, Chao}, title = {Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3898-3907} }
MM-OVSeg: Multimodal Optical-SAR Fusion for Open-Vocabulary Segmentation in Remote Sensing: Yimin Wei,

Aoran Xiao,

Hongruixuan Chen,

Junshi Xia,

Naoto Yokoya; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yimin and Xiao, Aoran and Chen, Hongruixuan and Xia, Junshi and Yokoya, Naoto}, title = {MM-OVSeg: Multimodal Optical-SAR Fusion for Open-Vocabulary Segmentation in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42202-42212} }
Depth Any Endoscopy: Towards Self-Supervised Generalizable Depth Estimation in Monocular Endoscopy: Shuwei Shao,

Kejin Zhu,

Shixing Ma,

Xinzhe Du,

Baochang Zhang,

Zhe Min; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Shuwei and Zhu, Kejin and Ma, Shixing and Du, Xinzhe and Zhang, Baochang and Min, Zhe}, title = {Depth Any Endoscopy: Towards Self-Supervised Generalizable Depth Estimation in Monocular Endoscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34126-34137} }
Guiding a Diffusion Model by Swapping Its Tokens: Weijia Zhang,

Yuehao Liu,

Shanyan Guan,

Wu Ran,

Yanhao Ge,

Wei Li,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Weijia and Liu, Yuehao and Guan, Shanyan and Ran, Wu and Ge, Yanhao and Li, Wei and Ma, Chao}, title = {Guiding a Diffusion Model by Swapping Its Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14263-14272} }
CLiViS: Unleashing Cognitive Map through Linguistic-Visual Synergy for Embodied Visual Reasoning: Kailing Li,

Qi'ao Xu,

Tianwen Qian,

Yuqian Fu,

Yang Jiao,

Xiaoling Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kailing and Xu, Qi'ao and Qian, Tianwen and Fu, Yuqian and Jiao, Yang and Wang, Xiaoling}, title = {CLiViS: Unleashing Cognitive Map through Linguistic-Visual Synergy for Embodied Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5134-5143} }
CARD: A Multi-Modal Automotive Dataset for Dense 3D Reconstruction in Challenging Road Topography: Gasser Elazab,

Frank Neuhaus,

Tilman Koß,

Malte Splietker,

Aditya Date,

Michael Unterreiner,

Maximilian Jansen,

Olaf Hellwich; [pdf] [supp]
[bibtex]
@InProceedings{Elazab_2026_CVPR, author = {Elazab, Gasser and Neuhaus, Frank and Ko{\ss}, Tilman and Splietker, Malte and Date, Aditya and Unterreiner, Michael and Jansen, Maximilian and Hellwich, Olaf}, title = {CARD: A Multi-Modal Automotive Dataset for Dense 3D Reconstruction in Challenging Road Topography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17820-17830} }
Next-Scale Prediction: A Self-Supervised Approach for Real-World Image Denoising: Yiwen Shan,

Haiyu Zhao,

Peng Hu,

Xi Peng,

Yuanbiao Gou; [pdf] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Yiwen and Zhao, Haiyu and Hu, Peng and Peng, Xi and Gou, Yuanbiao}, title = {Next-Scale Prediction: A Self-Supervised Approach for Real-World Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22669-22678} }
TEAR: Temporal-aware Automated Red-teaming for Text-to-Video Models: Jiaming He,

Guanyu Hou,

Hongwei Li,

Zhicong Huang,

Kangjie Chen,

Yi Yu,

Wenbo Jiang,

Guowen Xu,

Tianwei Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Jiaming and Hou, Guanyu and Li, Hongwei and Huang, Zhicong and Chen, Kangjie and Yu, Yi and Jiang, Wenbo and Xu, Guowen and Zhang, Tianwei}, title = {TEAR: Temporal-aware Automated Red-teaming for Text-to-Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41-50} }
GGPT: Geometry-Grounded Point Transformer: Yutong Chen,

Yiming Wang,

Xucong Zhang,

Sergey Prokudin,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yutong and Wang, Yiming and Zhang, Xucong and Prokudin, Sergey and Tang, Siyu}, title = {GGPT: Geometry-Grounded Point Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28959-28968} }
Bulk RNA-seq Guided Multi-modal Detection of Anomalous Regions in Human Cancer via Spatial Transcriptomics: Hang Shi,

Ruocheng Yang,

Wenjie You,

Zhilin Huang,

Daoqiang Zhang,

Wei Shao; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Hang and Yang, Ruocheng and You, Wenjie and Huang, Zhilin and Zhang, Daoqiang and Shao, Wei}, title = {Bulk RNA-seq Guided Multi-modal Detection of Anomalous Regions in Human Cancer via Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41815-41825} }
Mitigating Simplicity Bias in OOD Detection through Object Co-occurrence Analysis: Boyang Dai,

Chaoqi Chen,

Yizhou Yu; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Boyang and Chen, Chaoqi and Yu, Yizhou}, title = {Mitigating Simplicity Bias in OOD Detection through Object Co-occurrence Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20345-20355} }
UIKA: Fast Universal Head Avatar from Pose-Free Images: Zijian Wu,

Boyao Zhou,

Liangxiao Hu,

Hongyu Liu,

Yuan Sun,

Xuan Wang,

Xun Cao,

Yujun Shen,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zijian and Zhou, Boyao and Hu, Liangxiao and Liu, Hongyu and Sun, Yuan and Wang, Xuan and Cao, Xun and Shen, Yujun and Zhu, Hao}, title = {UIKA: Fast Universal Head Avatar from Pose-Free Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18216-18228} }
GroundingME: Exposing the Visual Grounding Gap in MLLMs through Multi-Dimensional Evaluation: Rang Li,

Lei Li,

Shuhuai Ren,

Hao Tian,

Shuhao Gu,

Shicheng Li,

Zihao Yue,

Yudong Wang,

Wenhan Ma,

Zhe Yang,

Jingyuan Ma,

Zhifang Sui,

Fuli Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Rang and Li, Lei and Ren, Shuhuai and Tian, Hao and Gu, Shuhao and Li, Shicheng and Yue, Zihao and Wang, Yudong and Ma, Wenhan and Yang, Zhe and Ma, Jingyuan and Sui, Zhifang and Luo, Fuli}, title = {GroundingME: Exposing the Visual Grounding Gap in MLLMs through Multi-Dimensional Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2412-2422} }
Aesthetic Camera Viewpoint Suggestion with 3D Aesthetic Field: Sheyang Tang,

Armin Shafiee Sarvestani,

Jialu Xu,

Xiaoyu Xu,

Zhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Sheyang and Sarvestani, Armin Shafiee and Xu, Jialu and Xu, Xiaoyu and Wang, Zhou}, title = {Aesthetic Camera Viewpoint Suggestion with 3D Aesthetic Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8278-8287} }
AGFT: Alignment-Guided Fine-Tuning for Zero-Shot Adversarial Robustness of Vision-Language Models: Yubo Cui,

Xianchao Guan,

Zijun Xiong,

Zheng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Yubo and Guan, Xianchao and Xiong, Zijun and Zhang, Zheng}, title = {AGFT: Alignment-Guided Fine-Tuning for Zero-Shot Adversarial Robustness of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22836-22846} }
GEM-TFL: Bridging Weak and Full Supervision for Forgery Localization through EM-Guided Decomposition and Temporal Refinement: Xiaodong Zhu,

Yuanming Zheng,

Suting Wang,

Junqi Yang,

Yuhong Yang,

Weiping Tu,

Zhongyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaodong and Zheng, Yuanming and Wang, Suting and Yang, Junqi and Yang, Yuhong and Tu, Weiping and Wang, Zhongyuan}, title = {GEM-TFL: Bridging Weak and Full Supervision for Forgery Localization through EM-Guided Decomposition and Temporal Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42932-42941} }
Coupling Liquid Time-Constant Encoders with Modern Hopfield Memory: Bishal Ranjan Swain,

Kyung Joo Cheoi,

Jaepil Ko; [pdf]
[bibtex]
@InProceedings{Swain_2026_CVPR, author = {Swain, Bishal Ranjan and Cheoi, Kyung Joo and Ko, Jaepil}, title = {Coupling Liquid Time-Constant Encoders with Modern Hopfield Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27409-27417} }
OmniGen2: Towards Instruction-Aligned Multimodal Generation: Chenyuan Wu,

Jiahao Wang,

Pengfei Zheng,

Ruiran Yan,

Shitao Xiao,

Xin Luo,

Yueze Wang,

Wanli Li,

Xiyan Jiang,

Yexin Liu,

Junjie Zhou,

Ziyi Xia,

Ze Liu,

Chaofan Li,

Haoge Deng,

Kun Luo,

Bo Zhang,

Jiajun Zhang,

Dong Liu,

Defu Lian,

Xinlong Wang,

Zhongyuan Wang,

Tiejun Huang,

Zheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyuan and Wang, Jiahao and Zheng, Pengfei and Yan, Ruiran and Xiao, Shitao and Luo, Xin and Wang, Yueze and Li, Wanli and Jiang, Xiyan and Liu, Yexin and Zhou, Junjie and Xia, Ziyi and Liu, Ze and Li, Chaofan and Deng, Haoge and Luo, Kun and Zhang, Bo and Zhang, Jiajun and Liu, Dong and Lian, Defu and Wang, Xinlong and Wang, Zhongyuan and Huang, Tiejun and Liu, Zheng}, title = {OmniGen2: Towards Instruction-Aligned Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21964-21975} }
VLM-Loc: Localization in Point Cloud Maps via Vision-Language Models: Shuhao Kang,

Youqi Liao,

Peijie Wang,

Wenlong Liao,

Qilin Zhang,

Benjamin Busam,

Xieyuanli Chen,

Yun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Shuhao and Liao, Youqi and Wang, Peijie and Liao, Wenlong and Zhang, Qilin and Busam, Benjamin and Chen, Xieyuanli and Liu, Yun}, title = {VLM-Loc: Localization in Point Cloud Maps via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41365-41375} }
MSJoE: Jointly Evolving MLLM and Sampler for Efficient Long-Form Video Understanding: Wenhui Tan,

Xiaoyi Yu,

Jiaze Li,

Yijing Chen,

Jianzhong Ju,

Zhenbo Luo,

Ruihua Song,

Jian Luan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Wenhui and Yu, Xiaoyi and Li, Jiaze and Chen, Yijing and Ju, Jianzhong and Luo, Zhenbo and Song, Ruihua and Luan, Jian}, title = {MSJoE: Jointly Evolving MLLM and Sampler for Efficient Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19487-19496} }
FloodDiffusion: Tailored Diffusion Forcing for Streaming Motion Generation: Yiyi Cai,

Yuhan Wu,

Kunhang Li,

You Zhou,

Bo Zheng,

Haiyang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Yiyi and Wu, Yuhan and Li, Kunhang and Zhou, You and Zheng, Bo and Liu, Haiyang}, title = {FloodDiffusion: Tailored Diffusion Forcing for Streaming Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2295-2304} }
Edge-Focused Super-Resolution for Omnidirectional Images with Spherical Geometric Augmentation: Shaolin Wang,

Yuying Li,

Lei Zhong,

Shigang Li,

Jianfeng Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shaolin and Li, Yuying and Zhong, Lei and Li, Shigang and Li, Jianfeng}, title = {Edge-Focused Super-Resolution for Omnidirectional Images with Spherical Geometric Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38198-38207} }
From Panel to Pixel: Zoom-In Vision-Language Pretraining from Biomedical Scientific Literature: Kun Yuan,

Min Sun,

Zhen Chen,

Alejandro Lozano,

Xiangteng He,

Shi Li,

Nassir Navab,

Xiaoxiao Sun,

Nicolas Padoy,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Kun and Sun, Min and Chen, Zhen and Lozano, Alejandro and He, Xiangteng and Li, Shi and Navab, Nassir and Sun, Xiaoxiao and Padoy, Nicolas and Yeung-Levy, Serena}, title = {From Panel to Pixel: Zoom-In Vision-Language Pretraining from Biomedical Scientific Literature}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42649-42658} }
PQDT: Pseudo-Query Dual Transformer for Robust Point Cloud Restoration: Haoqing Wu,

Alexa Nawotki,

Jochen Garcke; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haoqing and Nawotki, Alexa and Garcke, Jochen}, title = {PQDT: Pseudo-Query Dual Transformer for Robust Point Cloud Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24195-24205} }
Love Me, Love My Label: Rethinking the Role of Labels in Prompt Retrieval for Visual In-Context Learning: Tianci Luo,

Haohao Pan,

Jinpeng Wang,

Niu Lian,

Xinrui Chen,

Bin Chen,

Shu-Tao Xia,

Chun Yuan; [pdf] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Tianci and Pan, Haohao and Wang, Jinpeng and Lian, Niu and Chen, Xinrui and Chen, Bin and Xia, Shu-Tao and Yuan, Chun}, title = {Love Me, Love My Label: Rethinking the Role of Labels in Prompt Retrieval for Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38774-38783} }
MeshRipple: Structured Autoregressive Generation of Artist-Meshes: Junkai Lin,

Hang Long,

Huipeng Guo,

Jielei Zhang,

Jiayi Yang,

Tianle Guo,

Yang Yang,

Jianwen Li,

Wenxiao ZHANG,

Matthias Nießner,

Wei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Junkai and Long, Hang and Guo, Huipeng and Zhang, Jielei and Yang, Jiayi and Guo, Tianle and Yang, Yang and Li, Jianwen and ZHANG, Wenxiao and Nie{\ss}ner, Matthias and Yang, Wei}, title = {MeshRipple: Structured Autoregressive Generation of Artist-Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12706-12718} }
OralGPT-Omni: A Versatile Dental Multimodal Large Language Model: Jing Hao,

Yuci Liang,

Lizhuo Lin,

Yuxuan Fan,

Wenkai Zhou,

Kaixin Guo,

Zanting Ye,

Yanpeng Sun,

Xinyu Zhang,

Yanqi Yang,

Qiankun Li,

Hao Tang,

James Kit-Hon Tsoi,

Linlin Shen,

Kuo Feng Hung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Jing and Liang, Yuci and Lin, Lizhuo and Fan, Yuxuan and Zhou, Wenkai and Guo, Kaixin and Ye, Zanting and Sun, Yanpeng and Zhang, Xinyu and Yang, Yanqi and Li, Qiankun and Tang, Hao and Tsoi, James Kit-Hon and Shen, Linlin and Hung, Kuo Feng}, title = {OralGPT-Omni: A Versatile Dental Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38509-38519} }
VisualAD: Language-Free Zero-Shot Anomaly Detection via Vision Transformer: Yanning Hou,

Peiyuan Li,

Zirui Liu,

Yitong Wang,

Yanran Ruan,

Jianfeng Qiu,

Ke Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Yanning and Li, Peiyuan and Liu, Zirui and Wang, Yitong and Ruan, Yanran and Qiu, Jianfeng and Xu, Ke}, title = {VisualAD: Language-Free Zero-Shot Anomaly Detection via Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21346-21356} }
Adaptive Spectral Feature Forecasting for Diffusion Sampling Acceleration: Jiaqi Han,

Juntong Shi,

Puheng Li,

Haotian Ye,

Qiushan Guo,

Stefano Ermon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jiaqi and Shi, Juntong and Li, Puheng and Ye, Haotian and Guo, Qiushan and Ermon, Stefano}, title = {Adaptive Spectral Feature Forecasting for Diffusion Sampling Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43320-43330} }
Progressive Neural Architecture Generation: Caiyang Yu,

Chen Huang,

Yun Liu,

Chenwei Tang,

Wei Ju,

Jiancheng Lv; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Caiyang and Huang, Chen and Liu, Yun and Tang, Chenwei and Ju, Wei and Lv, Jiancheng}, title = {Progressive Neural Architecture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34524-34534} }
Shape-of-You: Fused Gromov-Wasserstein Optimal Transport for Semantic Correspondence in-the-Wild: Jiin Im,

Sisung Liu,

Je Hyeong Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Im_2026_CVPR, author = {Im, Jiin and Liu, Sisung and Hong, Je Hyeong}, title = {Shape-of-You: Fused Gromov-Wasserstein Optimal Transport for Semantic Correspondence in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27730-27739} }
TRIDENT: A Trimodal Cascade Generative Framework for Drug and RNA-Conditioned Cellular Morphology Synthesis: Rui Peng,

Ziru Liu,

Lingyuan Ye,

Yuxing Lu,

Boxin Shi,

Jinzhuo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Rui and Liu, Ziru and Ye, Lingyuan and Lu, Yuxing and Shi, Boxin and Wang, Jinzhuo}, title = {TRIDENT: A Trimodal Cascade Generative Framework for Drug and RNA-Conditioned Cellular Morphology Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26803-26812} }
EDGS: Eliminating Densification for Efficient Convergence of 3DGS: Dmytro Kotovenko,

Olga Grebenkova,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kotovenko_2026_CVPR, author = {Kotovenko, Dmytro and Grebenkova, Olga and Ommer, Bj\"orn}, title = {EDGS: Eliminating Densification for Efficient Convergence of 3DGS}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41065-41076} }
BiGMINT: Biologically-guided Hierarchical Multimodal Integration for Modeling Multiple Compound Activities in Drug Discovery: Pushpak Pati,

Bo Li,

Abbas Rayabat Khan,

Tomé Albuquerque,

Steffen Jaensch,

Amina Mollaysa,

Walid M. Abdelmoula,

Samantha J. Allen,

Joke Reumers,

Helai P. Mohammad,

Scott Oloff,

Tommaso Mansi,

Rui Liao,

Dmytro S. Lituiev,

Zhoubing Xu; [pdf] [supp]
[bibtex]
@InProceedings{Pati_2026_CVPR, author = {Pati, Pushpak and Li, Bo and Khan, Abbas Rayabat and Albuquerque, Tom\'e and Jaensch, Steffen and Mollaysa, Amina and Abdelmoula, Walid M. and Allen, Samantha J. and Reumers, Joke and Mohammad, Helai P. and Oloff, Scott and Mansi, Tommaso and Liao, Rui and Lituiev, Dmytro S. and Xu, Zhoubing}, title = {BiGMINT: Biologically-guided Hierarchical Multimodal Integration for Modeling Multiple Compound Activities in Drug Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6982-6993} }
Beyond the Static-World: Lifelong Learning for All-in-One Medical Image Restoration: Shihao Shan,

Hongying Liu,

Fanhua Shang,

Liang Wan,

Jingjing Deng; [pdf]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Shihao and Liu, Hongying and Shang, Fanhua and Wan, Liang and Deng, Jingjing}, title = {Beyond the Static-World: Lifelong Learning for All-in-One Medical Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13702-13711} }
When Numbers Speak: Aligning Textual Numerals and Visual Instances in Text-to-Video Diffusion Models: Zhengyang Sun,

Yu Chen,

Xin Zhou,

Xiaofan Li,

Xiwu Chen,

Dingkang Liang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhengyang and Chen, Yu and Zhou, Xin and Li, Xiaofan and Chen, Xiwu and Liang, Dingkang and Bai, Xiang}, title = {When Numbers Speak: Aligning Textual Numerals and Visual Instances in Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24268-24278} }
Text-Image Conditioned 3D Generation: Jiazhong Cen,

Jiemin Fang,

Sikuang Li,

Guanjun Wu,

Chen Yang,

Taoran Yi,

Zanwei Zhou,

Zhikuan Bao,

Lingxi Xie,

Wei Shen,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cen_2026_CVPR, author = {Cen, Jiazhong and Fang, Jiemin and Li, Sikuang and Wu, Guanjun and Yang, Chen and Yi, Taoran and Zhou, Zanwei and Bao, Zhikuan and Xie, Lingxi and Shen, Wei and Tian, Qi}, title = {Text-Image Conditioned 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {604-614} }
Soft Modality-Guided Expert Specialization in MoE-VLMs: Zi-Hao Bo,

Yaqian Li,

Anzhou Hou,

Rinyoichi Takezoe,

Ertao Zhao,

Tianxiang Pan,

Jiale Yan,

Mo Guang,

Kaiwen Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Zi-Hao and Li, Yaqian and Hou, Anzhou and Takezoe, Rinyoichi and Zhao, Ertao and Pan, Tianxiang and Yan, Jiale and Guang, Mo and Long, Kaiwen}, title = {Soft Modality-Guided Expert Specialization in MoE-VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24330-24340} }
Critical Patch-Aware Sparse Prompting with Decoupled Training for Continual Learning on the Edge: Wonseon Lim,

Jaesung Lee,

Dae-Won Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Wonseon and Lee, Jaesung and Kim, Dae-Won}, title = {Critical Patch-Aware Sparse Prompting with Decoupled Training for Continual Learning on the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17989-17998} }
CubeComposer: Spatio-Temporal Autoregressive 4K 360deg Video Generation from Perspective Video: Lingen Li,

Guangzhi Wang,

Xiaoyu Li,

Zhaoyang Zhang,

Qi Dou,

Jinwei Gu,

Tianfan Xue,

Ying Shan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lingen and Wang, Guangzhi and Li, Xiaoyu and Zhang, Zhaoyang and Dou, Qi and Gu, Jinwei and Xue, Tianfan and Shan, Ying}, title = {CubeComposer: Spatio-Temporal Autoregressive 4K 360deg Video Generation from Perspective Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32625-32635} }
Failure Modes for Deep Learning-Based Online Mapping: How to Measure and Address Them: Michael Hubbertz,

Qi Han,

Tobias Meisen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hubbertz_2026_CVPR, author = {Hubbertz, Michael and Han, Qi and Meisen, Tobias}, title = {Failure Modes for Deep Learning-Based Online Mapping: How to Measure and Address Them}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39755-39764} }
SV-GS: Sparse View 4D Reconstruction with Skeleton-Driven Gaussian Splatting: Jun-Jee Chao,

Volkan Isler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chao_2026_CVPR, author = {Chao, Jun-Jee and Isler, Volkan}, title = {SV-GS: Sparse View 4D Reconstruction with Skeleton-Driven Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5027-5037} }
Streamlined Open-Vocabulary Human-Object Interaction Detection: Chang Sun,

Dongliang Liao,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Chang and Liao, Dongliang and Ding, Changxing}, title = {Streamlined Open-Vocabulary Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20324-20333} }
Think 360deg: Beyond Depth: Evaluating the Width-centric Reasoning Capability of MLLMs: Mingrui Chen,

Hexiong Yang,

Haogeng Liu,

Huaibo Huang,

Ran He; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Mingrui and Yang, Hexiong and Liu, Haogeng and Huang, Huaibo and He, Ran}, title = {Think 360deg: Beyond Depth: Evaluating the Width-centric Reasoning Capability of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5211-5220} }
Multi-Prototype Compactness and Boundary-Aware Synthesis for Unsupervised Anomaly Detection: Kailun Liao,

Jianfeng Yang,

Tao Tao,

Wenfei Wu,

Jiaming Jiang,

Jinsheng Xiao; [pdf]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Kailun and Yang, Jianfeng and Tao, Tao and Wu, Wenfei and Jiang, Jiaming and Xiao, Jinsheng}, title = {Multi-Prototype Compactness and Boundary-Aware Synthesis for Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28524-28533} }
ORBIT: Benchmarking SfM in the Wild with 360deg Video: Sara Sabour,

Richard Tucker,

Marcus Brubaker,

Saurabh Saxena,

Junhwa Hur,

Andrea Tagliasacchi,

Deqing Sun,

David J. Fleet,

Richard Szeliski,

Noah Snavely; [pdf] [supp]
[bibtex]
@InProceedings{Sabour_2026_CVPR, author = {Sabour, Sara and Tucker, Richard and Brubaker, Marcus and Saxena, Saurabh and Hur, Junhwa and Tagliasacchi, Andrea and Sun, Deqing and Fleet, David J. and Szeliski, Richard and Snavely, Noah}, title = {ORBIT: Benchmarking SfM in the Wild with 360deg Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6791-6801} }
TR2M: Transferring Monocular Relative Depth to Metric Depth with Language Descriptions and Dual-Level Scale-Oriented Contrast: Beilei Cui,

Yiming Huang,

Long Bai,

Hongliang Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Beilei and Huang, Yiming and Bai, Long and Ren, Hongliang}, title = {TR2M: Transferring Monocular Relative Depth to Metric Depth with Language Descriptions and Dual-Level Scale-Oriented Contrast}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34181-34192} }
VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition: Tanush Yadav,

Mohammadreza Salehi,

Jae Sung Park,

Vivek Ramanujan,

Hannaneh Hajishirzi,

Yejin Choi,

Ali Farhadi,

Rohun Tripathi,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yadav_2026_CVPR, author = {Yadav, Tanush and Salehi, Mohammadreza and Park, Jae Sung and Ramanujan, Vivek and Hajishirzi, Hannaneh and Choi, Yejin and Farhadi, Ali and Tripathi, Rohun and Krishna, Ranjay}, title = {VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12881-12891} }
DynBridge: Bridging Imagination and Control through Interaction Dynamics for Robot Manipulation: Alex Wang,

Zhiwei Dong,

Qicheng Bai,

Chenshi Zhang,

Yujie Yi,

Guang Dai,

Yong Liu,

Mengmeng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Alex and Dong, Zhiwei and Bai, Qicheng and Zhang, Chenshi and Yi, Yujie and Dai, Guang and Liu, Yong and Wang, Mengmeng}, title = {DynBridge: Bridging Imagination and Control through Interaction Dynamics for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22423-22432} }
Deeper Thought, Weaker Aim: Understanding and Mitigating Perceptual Impairment during Reasoning in Multimodal Large Language Models: Ruiying Peng,

Xueyu Wu,

Jing Lei,

Lu Hou,

Yuanzheng Ma,

Xiao-Hui Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Ruiying and Wu, Xueyu and Lei, Jing and Hou, Lu and Ma, Yuanzheng and Li, Xiao-Hui}, title = {Deeper Thought, Weaker Aim: Understanding and Mitigating Perceptual Impairment during Reasoning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12064-12073} }
BulletTime: Decoupled Control of Time and Camera Pose for Video Generation: Yiming Wang,

Qihang Zhang,

Shengqu Cai,

Tong Wu,

Jan Ackermann,

Zhengfei Kuang,

Yang Zheng,

Frano Rajič,

Siyu Tang,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yiming and Zhang, Qihang and Cai, Shengqu and Wu, Tong and Ackermann, Jan and Kuang, Zhengfei and Zheng, Yang and Raji\v{c}, Frano and Tang, Siyu and Wetzstein, Gordon}, title = {BulletTime: Decoupled Control of Time and Camera Pose for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18319-18330} }
Human Interaction-Aware 3D Reconstruction from a Single Image: Gwanghyun Kim,

Junghun James Kim,

Suh Yoon Jeon,

Jason Park,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Gwanghyun and Kim, Junghun James and Jeon, Suh Yoon and Park, Jason and Chun, Se Young}, title = {Human Interaction-Aware 3D Reconstruction from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21252-21261} }
Adaptive Action Chunking at Inference-time for Vision-Language-Action Models: Yuanchang Liang,

Xiaobo Wang,

Kai Wang,

Shuo Wang,

Xiaojiang Peng,

Haoyu Chen,

David Kim Huat Chua,

Prahlad Vadakkepat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yuanchang and Wang, Xiaobo and Wang, Kai and Wang, Shuo and Peng, Xiaojiang and Chen, Haoyu and Chua, David Kim Huat and Vadakkepat, Prahlad}, title = {Adaptive Action Chunking at Inference-time for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20802-20811} }
ReManNet: A Riemannian Manifold Network for Monocular 3D Lane Detection: Chengzhi Hong,

Bijun Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Chengzhi and Li, Bijun}, title = {ReManNet: A Riemannian Manifold Network for Monocular 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33046-33056} }
MA-Bench: Towards Fine-grained Micro-Action Understanding: Kun Li,

Jihao Gu,

Fei Wang,

Zhiliang Wu,

Hehe Fan,

Dan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kun and Gu, Jihao and Wang, Fei and Wu, Zhiliang and Fan, Hehe and Guo, Dan}, title = {MA-Bench: Towards Fine-grained Micro-Action Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20118-20128} }
Beyond Layer-Wise Merging: Chain-of-Merging for Vision-Language Models: Xinyu Zhang,

Yuxuan Dong,

Lingling Zhang,

Chengyou Jia,

ZhuoHang Dang,

Yixing Yao,

Yaqiang Wu,

Basura Fernando,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinyu and Dong, Yuxuan and Zhang, Lingling and Jia, Chengyou and Dang, ZhuoHang and Yao, Yixing and Wu, Yaqiang and Fernando, Basura and Liu, Jun}, title = {Beyond Layer-Wise Merging: Chain-of-Merging for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24279-24289} }
Synthetic Object Compositions for Scalable and Accurate Learning in Detection, Segmentation, and Grounding: Weikai Huang,

Jieyu Zhang,

Taoyang Jia,

Chenhao Zheng,

Ziqi Gao,

Jae Sung Park,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Weikai and Zhang, Jieyu and Jia, Taoyang and Zheng, Chenhao and Gao, Ziqi and Park, Jae Sung and Krishna, Ranjay}, title = {Synthetic Object Compositions for Scalable and Accurate Learning in Detection, Segmentation, and Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6388-6398} }
WeaveTime: Streaming from Earlier Frames into Emergent Memory in VideoLLMs: Yulin Zhang,

Cheng Shi,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yulin and Shi, Cheng and Yang, Sibei}, title = {WeaveTime: Streaming from Earlier Frames into Emergent Memory in VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16920-16932} }
Towards Generalized Multimodal Homography Estimation: Jinkun You,

Jiaxin Cheng,

Jie Zhang,

Yicong Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Jinkun and Cheng, Jiaxin and Zhang, Jie and Zhou, Yicong}, title = {Towards Generalized Multimodal Homography Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8408-8417} }
Self-Attention Driven Tensor Representation for High-Order Data Recovery: Zhi-Wei Shi,

Yu-Bang Zheng,

Heng-Chao Li; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhi-Wei and Zheng, Yu-Bang and Li, Heng-Chao}, title = {Self-Attention Driven Tensor Representation for High-Order Data Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26399-26408} }
FLARE: A Failure-Aware Framework for Autonomous Correction and Recovery in Visual-Language Robotic Manipulation: Ganlong Zhao,

Zijia Tang,

Xingping Chen,

Zhanghui Kuang,

Ye Tian,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ganlong and Tang, Zijia and Chen, Xingping and Kuang, Zhanghui and Tian, Ye and Li, Guanbin}, title = {FLARE: A Failure-Aware Framework for Autonomous Correction and Recovery in Visual-Language Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22391-22401} }
SMV-EAR: Bring Spatiotemporal Multi-View Representation Learning into Efficient Event-Based Action Recognition: Rui Fan,

Weidong Hao,

Juntao Guan,

Lai Rui,

Tong Wu,

Fanhong Zeng,

Lin Gu; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Rui and Hao, Weidong and Guan, Juntao and Rui, Lai and Wu, Tong and Zeng, Fanhong and Gu, Lin}, title = {SMV-EAR: Bring Spatiotemporal Multi-View Representation Learning into Efficient Event-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6043-6053} }
Render-to-Adapt: Unsupervised Personal Adaptation for Gaze Estimation: Yangshi Ge,

Zheng Liu,

Feng Lu; [pdf]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Yangshi and Liu, Zheng and Lu, Feng}, title = {Render-to-Adapt: Unsupervised Personal Adaptation for Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3101-3110} }
VideoAuto-R1: Video Auto Reasoning via Thinking Once, Answering Twice: Shuming Liu,

Mingchen Zhuge,

Changsheng Zhao,

Jun Chen,

Lemeng Wu,

Zechun Liu,

Chenchen Zhu,

Zhipeng Cai,

Chong Zhou,

Haozhe Liu,

Ernie Chang,

Saksham Suri,

Hongyu Xu,

Qi Qian,

Wei Wen,

Balakrishnan Varadarajan,

Zhuang Liu,

Hu Xu,

Florian Bordes,

Raghuraman Krishnamoorthi,

Bernard Ghanem,

Vikas Chandra,

Yunyang Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shuming and Zhuge, Mingchen and Zhao, Changsheng and Chen, Jun and Wu, Lemeng and Liu, Zechun and Zhu, Chenchen and Cai, Zhipeng and Zhou, Chong and Liu, Haozhe and Chang, Ernie and Suri, Saksham and Xu, Hongyu and Qian, Qi and Wen, Wei and Varadarajan, Balakrishnan and Liu, Zhuang and Xu, Hu and Bordes, Florian and Krishnamoorthi, Raghuraman and Ghanem, Bernard and Chandra, Vikas and Xiong, Yunyang}, title = {VideoAuto-R1: Video Auto Reasoning via Thinking Once, Answering Twice}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32829-32839} }
Taming the Long Tail: Rebalancing Adversarial Training via Adaptive Perturbation: Lilin Zhang,

Yimo Guo,

Yue Li,

Jiancheng Shi,

Xianggen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lilin and Guo, Yimo and Li, Yue and Shi, Jiancheng and Liu, Xianggen}, title = {Taming the Long Tail: Rebalancing Adversarial Training via Adaptive Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34900-34907} }
ReFAct: Empowering Multimodal Web Agents with Visual and Context Focusing: Rui Wu,

Shuo Zhang,

Xiaoxuan Tang,

Ruirui Zhang,

Yi Liu,

Tao Jiang,

Wenhao Xu,

Yong Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Rui and Zhang, Shuo and Tang, Xiaoxuan and Zhang, Ruirui and Liu, Yi and Jiang, Tao and Xu, Wenhao and Li, Yong}, title = {ReFAct: Empowering Multimodal Web Agents with Visual and Context Focusing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14990-14999} }
Parallax to Align Them All: An OmniParallax Attention Mechanism for Distributed Multi-View Image Compression: Haotian Zhang,

Feiyue Long,

Yixin Yu,

Jian Xue,

Haocheng Tang,

Tongda Xu,

Zhenning Shi,

Yan Wang,

Siwei Ma,

Jiaqi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haotian and Long, Feiyue and Yu, Yixin and Xue, Jian and Tang, Haocheng and Xu, Tongda and Shi, Zhenning and Wang, Yan and Ma, Siwei and Zhang, Jiaqi}, title = {Parallax to Align Them All: An OmniParallax Attention Mechanism for Distributed Multi-View Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41322-41331} }
SharpTimeGS: Sharp and Stable Dynamic Gaussian Splatting via Lifespan Modulation: Zhanfeng Liao,

Jiajun Zhang,

Hanzhang Tu,

Zhixi Wang,

Yunqi Gao,

Hongwen Zhang,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Zhanfeng and Zhang, Jiajun and Tu, Hanzhang and Wang, Zhixi and Gao, Yunqi and Zhang, Hongwen and Liu, Yebin}, title = {SharpTimeGS: Sharp and Stable Dynamic Gaussian Splatting via Lifespan Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11798-11807} }
Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation: Chenxi Zhao,

Chen Zhu,

Xiaokun Feng,

Aiming Hao,

Jiashu Zhu,

Jiachen Lei,

Jiahong Wu,

Xiangxiang Chu,

Jufeng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhu, Chen and Feng, Xiaokun and Hao, Aiming and Zhu, Jiashu and Lei, Jiachen and Wu, Jiahong and Chu, Xiangxiang and Yang, Jufeng}, title = {Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36649-36659} }
VidPrism: Heterogeneous Mixture of Experts for Image-to-Video Transfer: Rui Lin,

Chuanming Wang,

Huadong Ma; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Rui and Wang, Chuanming and Ma, Huadong}, title = {VidPrism: Heterogeneous Mixture of Experts for Image-to-Video Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31230-31239} }
GeoAgent: Learning to Geolocate Everywhere with Reinforced Geographic Characteristics: Modi Jin,

Yiming Zhang,

Boyuan Sun,

Dingwen Zhang,

Ming-Ming Cheng,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Modi and Zhang, Yiming and Sun, Boyuan and Zhang, Dingwen and Cheng, Ming-Ming and Hou, Qibin}, title = {GeoAgent: Learning to Geolocate Everywhere with Reinforced Geographic Characteristics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41352-41364} }
ViHOI: Human-Object Interaction Synthesis with Visual Priors: Songjin Cai,

Linjie Zhong,

Ling Guo,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Songjin and Zhong, Linjie and Guo, Ling and Ding, Changxing}, title = {ViHOI: Human-Object Interaction Synthesis with Visual Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30686-30695} }
FILTR: Extracting Topological Features from Pretrained 3D Models: Louis Martinez,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Martinez_2026_CVPR, author = {Martinez, Louis and Ovsjanikov, Maks}, title = {FILTR: Extracting Topological Features from Pretrained 3D Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36179-36189} }
MSGNav: Unleashing the Power of Multi-modal 3D Scene Graph for Zero-Shot Embodied Navigation: Xun Huang,

Shijia Zhao,

Yunxiang Wang,

Xin Lu,

Wanfa Zhang,

Rongsheng Qu,

Weixin Li,

Yunhong Wang,

Chenglu Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xun and Zhao, Shijia and Wang, Yunxiang and Lu, Xin and Zhang, Wanfa and Qu, Rongsheng and Li, Weixin and Wang, Yunhong and Wen, Chenglu}, title = {MSGNav: Unleashing the Power of Multi-modal 3D Scene Graph for Zero-Shot Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37154-37163} }
Breaking the Scalability Limit of Multi-Projector Calibration with Embedded Cameras: Takumi Kawano,

Kohei Miura,

Daisuke Iwai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawano_2026_CVPR, author = {Kawano, Takumi and Miura, Kohei and Iwai, Daisuke}, title = {Breaking the Scalability Limit of Multi-Projector Calibration with Embedded Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21573-21582} }
A Combination of Noise and Bilateral Filters Achieve Supralinear and Scalable Adversarial Robustness in CNNs: Nicolas Stalder,

Benjamin F. Grewe,

Matteo Saponati,

Pau Vilimelis Aceituno; [pdf] [supp]
[bibtex]
@InProceedings{Stalder_2026_CVPR, author = {Stalder, Nicolas and Grewe, Benjamin F. and Saponati, Matteo and Aceituno, Pau Vilimelis}, title = {A Combination of Noise and Bilateral Filters Achieve Supralinear and Scalable Adversarial Robustness in CNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6559-6568} }
Distilling Quasi-Conformal Mapping: A Generalizable and Efficient Solution for Wide-Angle Correction: Chengyang Liu,

Zixuan Lin,

Miaolin Han,

Michael K. Ng,

Huibin Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chengyang and Lin, Zixuan and Han, Miaolin and Ng, Michael K. and Li, Huibin}, title = {Distilling Quasi-Conformal Mapping: A Generalizable and Efficient Solution for Wide-Angle Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19686-19695} }
UNICBench: UNIfied Counting Benchmark for MLLM: Chenggang Rong,

Tao Han,

Zhiyuan Zhao,

Yaowu Fan,

Jia Wan,

Song Guo,

Yuan Yuan,

Junyu Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rong_2026_CVPR, author = {Rong, Chenggang and Han, Tao and Zhao, Zhiyuan and Fan, Yaowu and Wan, Jia and Guo, Song and Yuan, Yuan and Gao, Junyu}, title = {UNICBench: UNIfied Counting Benchmark for MLLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23730-23740} }
Subspace Alignment for CLIP-based Continual Learning via Canonical Correlation Analysis: Huan Zhang,

Shuyu Dong,

Yujin Zheng,

Dingwen Wang,

Shenghua Fan,

Fan Lyu; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Huan and Dong, Shuyu and Zheng, Yujin and Wang, Dingwen and Fan, Shenghua and Lyu, Fan}, title = {Subspace Alignment for CLIP-based Continual Learning via Canonical Correlation Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32257-32266} }
CGL: Advancing Continual GUI Learning via Reinforcement Fine-Tuning: Zhenquan Yao,

Zitong Huang,

Yihan Zeng,

Jianhua Han,

Hang Xu,

Chun-Mei Feng,

Jianwei Ma,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Zhenquan and Huang, Zitong and Zeng, Yihan and Han, Jianhua and Xu, Hang and Feng, Chun-Mei and Ma, Jianwei and Zuo, Wangmeng}, title = {CGL: Advancing Continual GUI Learning via Reinforcement Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15212-15221} }
DVGT: Driving Visual Geometry Transformer: Sicheng Zuo,

Zixun Xie,

Wenzhao Zheng,

Shaoqing Xu,

Fang Li,

Shengyin Jiang,

Long Chen,

Zhi-Xin Yang,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Sicheng and Xie, Zixun and Zheng, Wenzhao and Xu, Shaoqing and Li, Fang and Jiang, Shengyin and Chen, Long and Yang, Zhi-Xin and Lu, Jiwen}, title = {DVGT: Driving Visual Geometry Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14658-14668} }
From Feature Learning to Spectral Basis Learning: A Unifying and Flexible Framework for Efficient and Robust Shape Matching: Feifan Luo,

Hongyang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Feifan and Chen, Hongyang}, title = {From Feature Learning to Spectral Basis Learning: A Unifying and Flexible Framework for Efficient and Robust Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31377-31388} }
Diffusion Forcing Planner: History-Annealed Planning with Time-Dependent Guidance for Autonomous Driving: Zehan Zhang,

Yaoyi Li,

Neng Zhang,

Jia Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zehan and Li, Yaoyi and Zhang, Neng and Cai, Jia}, title = {Diffusion Forcing Planner: History-Annealed Planning with Time-Dependent Guidance for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39796-39805} }
TextFM: Robust Semi-dense Feature Matching with Language Guidance: Zhihao Zheng,

Jinglun Feng,

Nirav Savaliya,

Zheng-Hang Yeh,

Bo Lang,

Mooi Choo Chuah; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhihao and Feng, Jinglun and Savaliya, Nirav and Yeh, Zheng-Hang and Lang, Bo and Chuah, Mooi Choo}, title = {TextFM: Robust Semi-dense Feature Matching with Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16635-16644} }
TC-Pade: Trajectory-Consistent Pade Approximation for Diffusion Acceleration: Shaoxuan He,

Benlei Cui,

Bukun Huang,

Zhizeng Ye,

Yunyun Sun,

Longtao Huang,

Hui Xue,

Yang Yang,

Haiwen Hong,

Jingqun Tang,

Zhou Zhao; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Shaoxuan and Cui, Benlei and Huang, Bukun and Ye, Zhizeng and Sun, Yunyun and Huang, Longtao and Xue, Hui and Yang, Yang and Hong, Haiwen and Tang, Jingqun and Zhao, Zhou}, title = {TC-Pade: Trajectory-Consistent Pade Approximation for Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35768-35778} }
Gau-Occ: Geometry-Completed Gaussians for Multi-Modal 3D Occupancy Prediction: Chengxin Lv,

Yihui Li,

Hongyu Yang,

YunHong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Chengxin and Li, Yihui and Yang, Hongyu and Wang, YunHong}, title = {Gau-Occ: Geometry-Completed Gaussians for Multi-Modal 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14198-14207} }
Splatent: Splatting Diffusion Latents for Novel View Synthesis: Or Hirschorn,

Omer Sela,

Inbar Huberman-Spiegelglas,

Netalee Efrat,

Eli Alshan,

Ianir Ideses,

Frederic Devernay,

Yochai Zvik,

Lior Fritz; [pdf] [supp]
[bibtex]
@InProceedings{Hirschorn_2026_CVPR, author = {Hirschorn, Or and Sela, Omer and Huberman-Spiegelglas, Inbar and Efrat, Netalee and Alshan, Eli and Ideses, Ianir and Devernay, Frederic and Zvik, Yochai and Fritz, Lior}, title = {Splatent: Splatting Diffusion Latents for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8319-8330} }
Franca: Nested Matryoshka Clustering for Scalable Visual Representation Learning: Shashanka Venkataramanan,

Valentinos Pariza,

Mohammadreza Salehi,

Lukas Knobel,

Elias Ramzi,

Spyros Gidaris,

Andrei Bursuc,

Yuki M Asano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Venkataramanan_2026_CVPR, author = {Venkataramanan, Shashanka and Pariza, Valentinos and Salehi, Mohammadreza and Knobel, Lukas and Ramzi, Elias and Gidaris, Spyros and Bursuc, Andrei and Asano, Yuki M}, title = {Franca: Nested Matryoshka Clustering for Scalable Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10533-10544} }
Towards Open Environments and Instructions: General Vision-Language Navigation via Fast-Slow Interactive Reasoning: Yang Li,

Aming Wu,

Zihao Zhang,

Yahong Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yang and Wu, Aming and Zhang, Zihao and Han, Yahong}, title = {Towards Open Environments and Instructions: General Vision-Language Navigation via Fast-Slow Interactive Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25184-25192} }
Red-teaming Retrieval-Augmented Diffusion Models via Poisoning Knowledge Bases: Xinqi Lyu,

Yihao Liu,

Dong Wang,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Xinqi and Liu, Yihao and Wang, Dong and Xiao, Bin}, title = {Red-teaming Retrieval-Augmented Diffusion Models via Poisoning Knowledge Bases}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34961-34970} }
StreamReady: Learning What to Answer and When in Long Streaming Videos: Shehreen Azad,

Vibhav Vineet,

Yogesh S Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Azad_2026_CVPR, author = {Azad, Shehreen and Vineet, Vibhav and Rawat, Yogesh S}, title = {StreamReady: Learning What to Answer and When in Long Streaming Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40494-40504} }
ExpPortrait: Expressive Portrait Generation via Personalized Representation: Junyi Wang,

Yudong Guo,

Boyang Guo,

Shengming Yang,

Juyong Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Junyi and Guo, Yudong and Guo, Boyang and Yang, Shengming and Zhang, Juyong}, title = {ExpPortrait: Expressive Portrait Generation via Personalized Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18107-18117} }
Benchmarking Single-Factor Physical Video-to-Audio Generation: Tingle Li,

Siddharth Gururani,

Kevin J. Shih,

Gantavya Bhatt,

Sang-gil Lee,

Zhifeng Kong,

Arushi Goel,

Gopala Anumanchipalli,

Ming-Yu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tingle and Gururani, Siddharth and Shih, Kevin J. and Bhatt, Gantavya and Lee, Sang-gil and Kong, Zhifeng and Goel, Arushi and Anumanchipalli, Gopala and Liu, Ming-Yu}, title = {Benchmarking Single-Factor Physical Video-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1939-1949} }
UniSH: Unifying Scene and Human Reconstruction in a Feed-Forward Pass: Mengfei Li,

Peng Li,

Zheng Zhang,

Jiahao Lu,

Chengfeng Zhao,

Wei Xue,

Qifeng Liu,

Sida Peng,

Wenxiao Zhang,

Wenhan Luo,

Yuan Liu,

Yike Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengfei and Li, Peng and Zhang, Zheng and Lu, Jiahao and Zhao, Chengfeng and Xue, Wei and Liu, Qifeng and Peng, Sida and Zhang, Wenxiao and Luo, Wenhan and Liu, Yuan and Guo, Yike}, title = {UniSH: Unifying Scene and Human Reconstruction in a Feed-Forward Pass}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14038-14049} }
V-Attack: Targeting Disentangled Value Features for Controllable Adversarial Attacks on LVLMs: Sen Nie,

Jie Zhang,

Jianxin Yan,

Shiguang Shan,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Sen and Zhang, Jie and Yan, Jianxin and Shan, Shiguang and Chen, Xilin}, title = {V-Attack: Targeting Disentangled Value Features for Controllable Adversarial Attacks on LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42257-42267} }
SpaceMind: Camera-Guided Modality Fusion for Spatial Reasoning in Vision-Language Models: Ruosen Zhao,

Zhikang Zhang,

Jialei Xu,

Jiahao Chang,

Dong Chen,

Lingyun Li,

Weijian Sun,

Zizhuang Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruosen and Zhang, Zhikang and Xu, Jialei and Chang, Jiahao and Chen, Dong and Li, Lingyun and Sun, Weijian and Wei, Zizhuang}, title = {SpaceMind: Camera-Guided Modality Fusion for Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16811-16822} }
Concept-Guided Fine-Tuning: Steering ViTs away from Spurious Correlations to Improve Robustness: Yehonatan Elisha,

Oren Barkan,

Noam Koenigstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elisha_2026_CVPR, author = {Elisha, Yehonatan and Barkan, Oren and Koenigstein, Noam}, title = {Concept-Guided Fine-Tuning: Steering ViTs away from Spurious Correlations to Improve Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17035-17045} }
3D Space as a Scratchpad for Editable Text-to-Image Generation: Oindrila Saha,

Vojtech Krs,

Radomir Mech,

Subhransu Maji,

Matheus Gadelha,

Kevin Blackburn-Matzen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saha_2026_CVPR, author = {Saha, Oindrila and Krs, Vojtech and Mech, Radomir and Maji, Subhransu and Gadelha, Matheus and Blackburn-Matzen, Kevin}, title = {3D Space as a Scratchpad for Editable Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29233-29243} }
Confusion-Aware Spectral Regularizer for Long-Tailed Recognition: Ziquan Zhu,

Gaojie Jin,

Hanruo Zhu,

Si-Yuan Lu,

Yunxiao Zhang,

Zeyu Fu,

Ronghui Mu,

Guoqiang Zhang,

Zhao Sun,

Yuhang Xia,

Jiaxing Shang,

Xiang Li,

Lu Liu,

Tianjin Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziquan and Jin, Gaojie and Zhu, Hanruo and Lu, Si-Yuan and Zhang, Yunxiao and Fu, Zeyu and Mu, Ronghui and Zhang, Guoqiang and Sun, Zhao and Xia, Yuhang and Shang, Jiaxing and Li, Xiang and Liu, Lu and Huang, Tianjin}, title = {Confusion-Aware Spectral Regularizer for Long-Tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28712-28722} }
SRPO: Self-Referential Policy Optimization for Vision-Language-Action Models: Senyu Fei,

Siyin Wang,

Li Ji,

Ao Li,

Shiduo Zhang,

Liming Liu,

Jinlong Hou,

Jingjing Gong,

Xianzhong Zhao,

Xipeng Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fei_2026_CVPR, author = {Fei, Senyu and Wang, Siyin and Ji, Li and Li, Ao and Zhang, Shiduo and Liu, Liming and Hou, Jinlong and Gong, Jingjing and Zhao, Xianzhong and Qiu, Xipeng}, title = {SRPO: Self-Referential Policy Optimization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6718-6728} }
FlashVGGT: Efficient and Scalable Visual Geometry Transformers with Compressed Descriptor Attention: Zipeng Wang,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zipeng and Xu, Dan}, title = {FlashVGGT: Efficient and Scalable Visual Geometry Transformers with Compressed Descriptor Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21826-21835} }
Differences That Matter: Auditing Models for Capability Gap Discovery and Rectification: Qihao Liu,

Chengzhi Mao,

Yaojie Liu,

Alan Yuille,

Wen-Sheng Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qihao and Mao, Chengzhi and Liu, Yaojie and Yuille, Alan and Chu, Wen-Sheng}, title = {Differences That Matter: Auditing Models for Capability Gap Discovery and Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1639-1650} }
Semantic Noise Reduction via Teacher-Guided Dual-Path Audio-Visual Representation Learning: Linge Wang,

Yingying Chen,

Bingke Zhu,

Lu Zhou,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Linge and Chen, Yingying and Zhu, Bingke and Zhou, Lu and Wang, Jinqiao}, title = {Semantic Noise Reduction via Teacher-Guided Dual-Path Audio-Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32005-32014} }
The Geometry of Robustness: Optimizing Loss Landscape Curvature and Feature Manifold Alignment for Robust Finetuning of Vision-Language Models: Shivang Chopra,

Shaunak Halbe,

Chengyue Huang,

Brisa Maneechotesuwan,

Zsolt Kira; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chopra_2026_CVPR, author = {Chopra, Shivang and Halbe, Shaunak and Huang, Chengyue and Maneechotesuwan, Brisa and Kira, Zsolt}, title = {The Geometry of Robustness: Optimizing Loss Landscape Curvature and Feature Manifold Alignment for Robust Finetuning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22133-22142} }
PolarGuide-GSDR: 3D Gaussian Splatting Driven by Polarization Priors and Deferred Reflection for Real-World Reflective Scenes: Derui Shan,

Qian Qiao,

Hao Lu,

Tao Du,

Peng Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Derui and Qiao, Qian and Lu, Hao and Du, Tao and Lu, Peng}, title = {PolarGuide-GSDR: 3D Gaussian Splatting Driven by Polarization Priors and Deferred Reflection for Real-World Reflective Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26000-26009} }
Parallelised Differentiable Straightest Geodesics for 3D Meshes: Hippolyte Verninas,

Caner Korkmaz,

Stefanos Zafeiriou,

Tolga Birdal,

Simone Foti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Verninas_2026_CVPR, author = {Verninas, Hippolyte and Korkmaz, Caner and Zafeiriou, Stefanos and Birdal, Tolga and Foti, Simone}, title = {Parallelised Differentiable Straightest Geodesics for 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14637-14647} }
PhysInOne: Visual Physics Learning and Reasoning in One Suite: Siyuan Zhou,

Hejun Wang,

Hu Cheng,

Jinxi Li,

Dongsheng Wang,

Junwei Jiang,

Yixiao Jin,

Jiayue Huang,

Shiwei Mao,

Shangjia Liu,

Yafei Yang,

Hongkang Song,

Shenxing Wei,

Zihui Zhang,

Bing Wang,

Zhihua Wang,

Chuhang Zou,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Siyuan and Wang, Hejun and Cheng, Hu and Li, Jinxi and Wang, Dongsheng and Jiang, Junwei and Jin, Yixiao and Huang, Jiayue and Mao, Shiwei and Liu, Shangjia and Yang, Yafei and Song, Hongkang and Wei, Shenxing and Zhang, Zihui and Wang, Bing and Wang, Zhihua and Zou, Chuhang and Yang, Bo}, title = {PhysInOne: Visual Physics Learning and Reasoning in One Suite}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33131-33142} }
Fusion of Depth and Semantics for Probabilistic Floorplan Localization: Kecheng Ye,

Mao Chen,

Xiangkai Zhang,

Xu Yang; [pdf]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Kecheng and Chen, Mao and Zhang, Xiangkai and Yang, Xu}, title = {Fusion of Depth and Semantics for Probabilistic Floorplan Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19413-19422} }
FVBench: Benchmarking Deepfake Video Detection Capability of Large Multimodal Models: Jiarui Wang,

Huiyu Duan,

Juntong Wang,

Xiongkuo Min; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiarui and Duan, Huiyu and Wang, Juntong and Min, Xiongkuo}, title = {FVBench: Benchmarking Deepfake Video Detection Capability of Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4425-4437} }
FFP-300K: Scaling First-Frame Propagation for Generalizable Video Editing: Xijie Huang,

Chengming Xu,

Donghao Luo,

Xiaobin Hu,

Peng Tang,

Xu Peng,

Jiangning Zhang,

Chengjie Wang,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xijie and Xu, Chengming and Luo, Donghao and Hu, Xiaobin and Tang, Peng and Peng, Xu and Zhang, Jiangning and Wang, Chengjie and Fu, Yanwei}, title = {FFP-300K: Scaling First-Frame Propagation for Generalizable Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23172-23181} }
OpenMarcie: Dataset for Multimodal Action Recognition in Industrial Environments: Hymalai Bello,

Lala Ray,

Joanna Sorysz,

Sungho Suh,

Paul Lukowicz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bello_2026_CVPR, author = {Bello, Hymalai and Ray, Lala and Sorysz, Joanna and Suh, Sungho and Lukowicz, Paul}, title = {OpenMarcie: Dataset for Multimodal Action Recognition in Industrial Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20129-20138} }
MTA: Multimodal Task Alignment for BEV Perception and Captioning: Yunsheng Ma,

Burhaneddin Yaman,

Xin Ye,

Jingru Luo,

Feng Tao,

Abhirup Mallik,

Ziran Wang,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yunsheng and Yaman, Burhaneddin and Ye, Xin and Luo, Jingru and Tao, Feng and Mallik, Abhirup and Wang, Ziran and Ren, Liu}, title = {MTA: Multimodal Task Alignment for BEV Perception and Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {670-679} }
GeoSAM2: Unleashing the Power of SAM2 for 3D Part Segmentation: Ken Deng,

Yunhan Yang,

Jingxiang Sun,

Xihui Liu,

Yebin Liu,

Ding Liang,

Yan-Pei Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Ken and Yang, Yunhan and Sun, Jingxiang and Liu, Xihui and Liu, Yebin and Liang, Ding and Cao, Yan-Pei}, title = {GeoSAM2: Unleashing the Power of SAM2 for 3D Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6367-6376} }
Avatar Forcing: Real-Time Interactive Head Avatar Generation for Natural Conversation: Taekyung Ki,

Sangwon Jang,

Jaehyeong Jo,

Jaehong Yoon,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ki_2026_CVPR, author = {Ki, Taekyung and Jang, Sangwon and Jo, Jaehyeong and Yoon, Jaehong and Hwang, Sung Ju}, title = {Avatar Forcing: Real-Time Interactive Head Avatar Generation for Natural Conversation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18074-18084} }
Omni-Attack: Adversarial Attacks on Open-Ended VQA in Black-Box Multimodal LLMs: Kai Hu,

Weichen Yu,

Li Zhang,

Alexander Robey,

Andy Zou,

Haoqi Hu,

Chengming Xu,

Matt Fredrikson; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Kai and Yu, Weichen and Zhang, Li and Robey, Alexander and Zou, Andy and Hu, Haoqi and Xu, Chengming and Fredrikson, Matt}, title = {Omni-Attack: Adversarial Attacks on Open-Ended VQA in Black-Box Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42341-42351} }
Time Without Time: Pseudo-Temporal Representation for Space-Time Super-Resolution: Hee Min Choi,

Hyoa Kang,

Suji Kim,

Dokwan Oh,

Nam Ik Cho; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Hee Min and Kang, Hyoa and Kim, Suji and Oh, Dokwan and Cho, Nam Ik}, title = {Time Without Time: Pseudo-Temporal Representation for Space-Time Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6812-6822} }
Unsafe2Safe: Controllable Image Anonymization for Downstream Utility: Minh Dinh,

SouYoung Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dinh_2026_CVPR, author = {Dinh, Minh and Jin, SouYoung}, title = {Unsafe2Safe: Controllable Image Anonymization for Downstream Utility}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3326-3336} }
No Way To Steal My Face: Proactive Defense Against Identity-Preserving Personalized Generation: Lizhi Xiong,

Jun Li,

Ziqiang Li,

Weiwei Jiang,

Zhangjie Fu; [pdf] [supp]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Lizhi and Li, Jun and Li, Ziqiang and Jiang, Weiwei and Fu, Zhangjie}, title = {No Way To Steal My Face: Proactive Defense Against Identity-Preserving Personalized Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20680-20690} }
From Intuition to Investigation: A Tool-Augmented Reasoning MLLM Framework for Generalizable Face Anti-Spoofing: Haoyuan Zhang,

Keyao Wang,

Guosheng Zhang,

Haixiao Yue,

Zhiwen Tan,

Siran Peng,

Tianshuo Zhang,

Xiao Tan,

Kunbin Chen,

Wei He,

Jingdong Wang,

Ajian Liu,

Xiangyu Zhu,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haoyuan and Wang, Keyao and Zhang, Guosheng and Yue, Haixiao and Tan, Zhiwen and Peng, Siran and Zhang, Tianshuo and Tan, Xiao and Chen, Kunbin and He, Wei and Wang, Jingdong and Liu, Ajian and Zhu, Xiangyu and Lei, Zhen}, title = {From Intuition to Investigation: A Tool-Augmented Reasoning MLLM Framework for Generalizable Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40855-40865} }
GaussianVision: Vision-Language Alignment from Compressed Image Representations using 2D Gaussian Splatting: Yasmine Omri,

Connor Ding,

Tsachy Weissman,

Thierry Tambe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Omri_2026_CVPR, author = {Omri, Yasmine and Ding, Connor and Weissman, Tsachy and Tambe, Thierry}, title = {GaussianVision: Vision-Language Alignment from Compressed Image Representations using 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14926-14935} }
Learning 3D Reconstruction with Priors in Test Time: Lei Zhou,

Haoyu Wu,

Akshat Dave,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Lei and Wu, Haoyu and Dave, Akshat and Samaras, Dimitris}, title = {Learning 3D Reconstruction with Priors in Test Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36550-36560} }
Domain Sensitive Federated Learning with Fisher-Informed Pruning: Chenchen Lin,

Wenhao Yuan,

Zhengji Xu,

Xuehe Wang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Chenchen and Yuan, Wenhao and Xu, Zhengji and Wang, Xuehe}, title = {Domain Sensitive Federated Learning with Fisher-Informed Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17535-17544} }
EgoXtreme: A Dataset for Robust Object Pose Estimation in Egocentric Views under Extreme Conditions: Taegyoon Yoon,

Yegyu Han,

Seojin Ji,

Jaewoo Park,

Sojeong Kim,

Taein Kwon,

Hyung-Sin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Taegyoon and Han, Yegyu and Ji, Seojin and Park, Jaewoo and Kim, Sojeong and Kwon, Taein and Kim, Hyung-Sin}, title = {EgoXtreme: A Dataset for Robust Object Pose Estimation in Egocentric Views under Extreme Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40696-40706} }
Hyper-PCN: Hypergraph-Based Point Cloud Completion via High-Order Correlation Modeling: Linfei Li,

Pei Tan,

Siqi Li,

Changqing Zou,

Yue Gao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Linfei and Tan, Pei and Li, Siqi and Zou, Changqing and Gao, Yue}, title = {Hyper-PCN: Hypergraph-Based Point Cloud Completion via High-Order Correlation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39121-39130} }
PAVAS: Physics-Aware Video-to-Audio Synthesis: Oh Hyun-Bin,

Yuhta Takida,

Toshimitsu Uesaka,

Tae-Hyun Oh,

Yuki Mitsufuji; [pdf] [supp]
[bibtex]
@InProceedings{Hyun-Bin_2026_CVPR, author = {Hyun-Bin, Oh and Takida, Yuhta and Uesaka, Toshimitsu and Oh, Tae-Hyun and Mitsufuji, Yuki}, title = {PAVAS: Physics-Aware Video-to-Audio Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14481-14491} }
From 3D Pose to Prose: Biomechanics-Grounded Vision-Language Coaching: Yuyang Ji,

Yixuan Shen,

Shengjie Zhu,

Yu Kong,

Feng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Yuyang and Shen, Yixuan and Zhu, Shengjie and Kong, Yu and Liu, Feng}, title = {From 3D Pose to Prose: Biomechanics-Grounded Vision-Language Coaching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23506-23515} }
FISHuman: Fine-grained Single-image 3D Human Reconstruction via Multi-view 4D Remeshing: Hanxi Liu,

Yifang Men,

Zhouhui Lian; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hanxi and Men, Yifang and Lian, Zhouhui}, title = {FISHuman: Fine-grained Single-image 3D Human Reconstruction via Multi-view 4D Remeshing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42765-42776} }
Exploring Adaptive Masked Reconstruction for Self-Supervised Skeleton-Based Action Recognition: Shengkai Sun,

Zhiyong Cheng,

Zefan Zhang,

Jianfeng Dong,

Zhihui Li,

Meng Wang; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Shengkai and Cheng, Zhiyong and Zhang, Zefan and Dong, Jianfeng and Li, Zhihui and Wang, Meng}, title = {Exploring Adaptive Masked Reconstruction for Self-Supervised Skeleton-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13974-13983} }
Global Prior Meets Local Consistency: Dual-Memory Augmented Vision-Language-Action Model for Efficient Robotic Manipulation: Zaijing Li,

Bing Hu,

Rui Shao,

Gongwei Chen,

Dongmei Jiang,

Pengwei Xie,

Jianye Hao,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zaijing and Hu, Bing and Shao, Rui and Chen, Gongwei and Jiang, Dongmei and Xie, Pengwei and Hao, Jianye and Nie, Liqiang}, title = {Global Prior Meets Local Consistency: Dual-Memory Augmented Vision-Language-Action Model for Efficient Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35135-35145} }
VGGT-ohm: Jianyuan Wang,

Minghao Chen,

Shangzhan Zhang,

Nikita Karaev,

Johannes Schönberger,

Patrick Labatut,

Piotr Bojanowski,

David Novotny,

Andrea Vedaldi,

Christian Rupprecht; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jianyuan and Chen, Minghao and Zhang, Shangzhan and Karaev, Nikita and Sch\"onberger, Johannes and Labatut, Patrick and Bojanowski, Piotr and Novotny, David and Vedaldi, Andrea and Rupprecht, Christian}, title = {VGGT-ohm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21486-21499} }
3D-IDE: 3D Implicit Depth Emergent: Chushan Zhang,

Ruihan Lu,

Jinguang Tong,

Yikai Wang,

Hongdong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chushan and Lu, Ruihan and Tong, Jinguang and Wang, Yikai and Li, Hongdong}, title = {3D-IDE: 3D Implicit Depth Emergent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23837-23847} }
CoRiM: Conflict-driven Risk Minimization for Dynamic Multimodal Fusion: Shihao Zou,

Wei Wei; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Shihao and Wei, Wei}, title = {CoRiM: Conflict-driven Risk Minimization for Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37821-37830} }
Sculpt4D: Generating 4D Shapes via Sparse-Attention Diffusion Transformers: Minghao Yin,

Wenbo Hu,

Jiale Xu,

Ying Shan,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Minghao and Hu, Wenbo and Xu, Jiale and Shan, Ying and Han, Kai}, title = {Sculpt4D: Generating 4D Shapes via Sparse-Attention Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27188-27198} }
StreamVLO: Streaming Visual-LiDAR Odometry with Cumulative Drift Compensation: Mengmeng Liu,

Jiuming Liu,

Michael Ying Yang,

Chaokang Jiang,

Jiangtao Li,

Yunpeng Zhang,

Hesheng Wang,

Francesco Nex,

Hao Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mengmeng and Liu, Jiuming and Yang, Michael Ying and Jiang, Chaokang and Li, Jiangtao and Zhang, Yunpeng and Wang, Hesheng and Nex, Francesco and Cheng, Hao}, title = {StreamVLO: Streaming Visual-LiDAR Odometry with Cumulative Drift Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39086-39097} }
FedARA: Resource-adaptive Low-rank Personalized Federated Learning via Anchor-driven Representation Alignment on Heterogeneous Edge Devices: Ruonan Zhao,

Zheng Wang,

Debin Liu,

Shijie Lv,

Laurence Tianruo Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruonan and Wang, Zheng and Liu, Debin and Lv, Shijie and Yang, Laurence Tianruo}, title = {FedARA: Resource-adaptive Low-rank Personalized Federated Learning via Anchor-driven Representation Alignment on Heterogeneous Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10357-10366} }
Seeing Through the Noise: Improving Infrared Small Target Detection and Segmentation from Noise Suppression Perspective: Maoxun Yuan,

Duanni Meng,

Ziteng Xi,

Tianyi Zhao,

Shiji Zhao,

Yimian Dai,

Xingxing Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Maoxun and Meng, Duanni and Xi, Ziteng and Zhao, Tianyi and Zhao, Shiji and Dai, Yimian and Wei, Xingxing}, title = {Seeing Through the Noise: Improving Infrared Small Target Detection and Segmentation from Noise Suppression Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27783-27792} }
MV2UV: Generating High-quality UV Texture Maps with Multiview Prompts: Zheng Zhang,

Qinchuan Zhang,

Yuteng Ye,

Zhi Chen,

Penglei Ji,

Mengfei Li,

Wenxiao Zhang,

Yuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zheng and Zhang, Qinchuan and Ye, Yuteng and Chen, Zhi and Ji, Penglei and Li, Mengfei and Zhang, Wenxiao and Liu, Yuan}, title = {MV2UV: Generating High-quality UV Texture Maps with Multiview Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12684-12694} }
WhisperNet: A Scalable Solution for Bandwidth-Efficient Collaboration: Gong Chen,

Chaokun Zhang,

Xinyan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Zhao, Xinyan}, title = {WhisperNet: A Scalable Solution for Bandwidth-Efficient Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32154-32163} }
One-to-All Animation: Alignment-Free Character Animation and Image Pose Transfer: Shijun Shi,

Jing Xu,

Zhihang Li,

Chunli Peng,

Xiaoda Yang,

Lijing Lu,

Kai Hu,

Jiangning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Shijun and Xu, Jing and Li, Zhihang and Peng, Chunli and Yang, Xiaoda and Lu, Lijing and Hu, Kai and Zhang, Jiangning}, title = {One-to-All Animation: Alignment-Free Character Animation and Image Pose Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4011-4021} }
SegCompass: Exploring Interpretable Alignment with Sparse Autoencoders for Enhanced Reasoning Segmentation: Zhenyu Lu,

Liupeng Li,

Jinpeng Wang,

Haoqian Kang,

Yan Feng,

Ke Chen,

Yaowei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zhenyu and Li, Liupeng and Wang, Jinpeng and Kang, Haoqian and Feng, Yan and Chen, Ke and Wang, Yaowei}, title = {SegCompass: Exploring Interpretable Alignment with Sparse Autoencoders for Enhanced Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19132-19142} }
DeX-Portrait: Disentangled and Expressive Portrait Animation via Explicit and Latent Motion Representations: Yuxiang Shi,

Zhe Li,

Yanwen Wang,

Hao Zhu,

Xun Cao,

Ligang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yuxiang and Li, Zhe and Wang, Yanwen and Zhu, Hao and Cao, Xun and Liu, Ligang}, title = {DeX-Portrait: Disentangled and Expressive Portrait Animation via Explicit and Latent Motion Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40009-40019} }
AdapAction: Adaptive Target Action Backdoor Attack against GUI Agents: Baicheng Chen,

Mingda Zhang,

Min Zhang,

Haizhou Li,

Baoyuan Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Baicheng and Zhang, Mingda and Zhang, Min and Li, Haizhou and Wu, Baoyuan}, title = {AdapAction: Adaptive Target Action Backdoor Attack against GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27894-27905} }
MultiModalPFN: Extending Prior-Data Fitted Networks for Multimodal Tabular Learning: Wall Kim,

Chaeyoung Song,

Hanul Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Wall and Song, Chaeyoung and Kim, Hanul}, title = {MultiModalPFN: Extending Prior-Data Fitted Networks for Multimodal Tabular Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30357-30367} }
VideoFusion: A Spatio-Temporal Collaborative Network for Multi-modal Video Fusion: Linfeng Tang,

Yeda Wang,

Meiqi Gong,

Zizhuo Li,

Yuxin Deng,

Xunpeng Yi,

Chunyu Li,

Han Xu,

Hao Zhang,

Jiayi Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Linfeng and Wang, Yeda and Gong, Meiqi and Li, Zizhuo and Deng, Yuxin and Yi, Xunpeng and Li, Chunyu and Xu, Han and Zhang, Hao and Ma, Jiayi}, title = {VideoFusion: A Spatio-Temporal Collaborative Network for Multi-modal Video Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19559-19569} }
Multi-Hierarchical Contrastive Spectral Fusion for Multi-View Clustering: Bing Cai,

Xiaoli Wang,

Gui-Fu Lu,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Bing and Wang, Xiaoli and Lu, Gui-Fu and Li, Zechao}, title = {Multi-Hierarchical Contrastive Spectral Fusion for Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39617-39626} }
Causal Motion Diffusion Models for Autoregressive Motion Generation: Qing Yu,

Akihisa Watanabe,

Kent Fujiwara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Qing and Watanabe, Akihisa and Fujiwara, Kent}, title = {Causal Motion Diffusion Models for Autoregressive Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38366-38375} }
GeoSemba: Reconstructing State Space Model for Cross Paradigm Representation in Medical Image Segmentation: Xutao Sun,

Jiarui Li,

Junwen Liu,

Yonggong Ren; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xutao and Li, Jiarui and Liu, Junwen and Ren, Yonggong}, title = {GeoSemba: Reconstructing State Space Model for Cross Paradigm Representation in Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29990-29999} }
Rethinking UMM Visual Generation: Masked Modeling for Efficient Image-Only Pre-training: Peng Sun,

Jun Xie,

Tao Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Peng and Xie, Jun and Lin, Tao}, title = {Rethinking UMM Visual Generation: Masked Modeling for Efficient Image-Only Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2047-2057} }
GeCo-SRT: Geometry-aware Continual Adaptation for Cross-Task Sim-to-Real Transfer: Wenbo Yu,

Wenke Xia,

Weitao Zhang,

Di Hu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Wenbo and Xia, Wenke and Zhang, Weitao and Hu, Di}, title = {GeCo-SRT: Geometry-aware Continual Adaptation for Cross-Task Sim-to-Real Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42408-42417} }
Dynamics: Language-Based Representation for Inferring Rigid-Body Dynamics From Videos: Chia-Hsiang Kao,

Cong Phuoc Huynh,

Chien-Yi Wang,

Noranart Vesdapunt,

Stefan Stojanov,

Bharath Hariharan,

Oleksandr Obiednikov,

Ning Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Kao_2026_CVPR, author = {Kao, Chia-Hsiang and Huynh, Cong Phuoc and Wang, Chien-Yi and Vesdapunt, Noranart and Stojanov, Stefan and Hariharan, Bharath and Obiednikov, Oleksandr and Zhou, Ning}, title = {Dynamics: Language-Based Representation for Inferring Rigid-Body Dynamics From Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42364-42374} }
E$^2$-SCI: Elastic Edge-Cloud Speculative Decoding via Credit Inertia: Senyao Li,

Haozhao Wang,

Zhaobai Jiang,

Zhanbo Jin,

Hao Fan,

Ruixuan Li; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Senyao and Wang, Haozhao and Jiang, Zhaobai and Jin, Zhanbo and Fan, Hao and Li, Ruixuan}, title = {E\${\textasciicircum}2\$-SCI: Elastic Edge-Cloud Speculative Decoding via Credit Inertia}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12944-12954} }
EgoAVU: Egocentric Audio-Visual Understanding: Ashish Seth,

Xinhao Mei,

Changsheng Zhao,

Varun Nagaraja,

Ernie Chang,

Gregory P. Meyer,

Gael Le Lan,

Yunyang Xiong,

Vikas Chandra,

Yangyang Shi,

Dinesh Manocha,

Zhipeng Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seth_2026_CVPR, author = {Seth, Ashish and Mei, Xinhao and Zhao, Changsheng and Nagaraja, Varun and Chang, Ernie and Meyer, Gregory P. and Le Lan, Gael and Xiong, Yunyang and Chandra, Vikas and Shi, Yangyang and Manocha, Dinesh and Cai, Zhipeng}, title = {EgoAVU: Egocentric Audio-Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15805-15814} }
CARD: Correlation Aware Restoration with Diffusion: Niki Nezakati,

Arnab Ghosh,

Amit Roy-Chowdhury,

Vishwanath Saragadam; [pdf] [supp]
[bibtex]
@InProceedings{Nezakati_2026_CVPR, author = {Nezakati, Niki and Ghosh, Arnab and Roy-Chowdhury, Amit and Saragadam, Vishwanath}, title = {CARD: Correlation Aware Restoration with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16531-16540} }
3D Gaussian Splatting at Arbitrary Resolutions with Compact Proxy Anchors: Mingyun Jeong,

Seongro Yoon,

Francois Bremond,

Donghyeon Cho; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Mingyun and Yoon, Seongro and Bremond, Francois and Cho, Donghyeon}, title = {3D Gaussian Splatting at Arbitrary Resolutions with Compact Proxy Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18991-19000} }
MMVIP: A Visible-infrared Paired Dataset for Multi-weather Marine Vision: Yunpeng Yin,

Lihan Wang,

Zhaoshen He,

Xinqiang He,

Xingming Liao,

Zhuowei Wang,

Lianglun Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Yunpeng and Wang, Lihan and He, Zhaoshen and He, Xinqiang and Liao, Xingming and Wang, Zhuowei and Cheng, Lianglun}, title = {MMVIP: A Visible-infrared Paired Dataset for Multi-weather Marine Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6432-6442} }
Specificity-aware reinforcement learning for fine-grained open-world classification: Samuele Angheben,

Davide Berasi,

Alessandro Conti,

Elisa Ricci,

Yiming Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Angheben_2026_CVPR, author = {Angheben, Samuele and Berasi, Davide and Conti, Alessandro and Ricci, Elisa and Wang, Yiming}, title = {Specificity-aware reinforcement learning for fine-grained open-world classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41467-41477} }
Forensic-Friendly Image Manipulation via Controllable Latent Diffusion: Hanyu Chen,

Haiwei Wu,

Jinyu Tian,

Jianqing LI,

Jiantao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hanyu and Wu, Haiwei and Tian, Jinyu and LI, Jianqing and Zhou, Jiantao}, title = {Forensic-Friendly Image Manipulation via Controllable Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35394-35404} }
TopoHR: Hierarchical Centerline Representation for Cyclic Topology Reasoning in Driving Scenes with Point-to-Instance Relations: Yifeng Bai,

Zhirong Chen,

Bo Song,

Erkang Cheng,

Haibin Ling; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Yifeng and Chen, Zhirong and Song, Bo and Cheng, Erkang and Ling, Haibin}, title = {TopoHR: Hierarchical Centerline Representation for Cyclic Topology Reasoning in Driving Scenes with Point-to-Instance Relations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18161-18170} }
Thinking with Programming Vision: Towards a Unified View for Thinking with Images: Zirun Guo,

Minjie Hong,

Feng Zhang,

Kai Jia,

Tao Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Zirun and Hong, Minjie and Zhang, Feng and Jia, Kai and Jin, Tao}, title = {Thinking with Programming Vision: Towards a Unified View for Thinking with Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33467-33476} }
Language Does Matter for Cross-Domain Few-Shot Visual Feature Enhancement: Fei Zhou,

Xiwen Zhang,

Qingqing Qiu,

Lei Zhang,

Wei Wei,

Chen Ding,

Yi Zhang,

Liang Li,

Xiangyu Yue,

Yanning Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Fei and Zhang, Xiwen and Qiu, Qingqing and Zhang, Lei and Wei, Wei and Ding, Chen and Zhang, Yi and Li, Liang and Yue, Xiangyu and Zhang, Yanning}, title = {Language Does Matter for Cross-Domain Few-Shot Visual Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7946-7956} }
Pip-Stereo: Progressive Iterations Pruner for Iterative Optimization based Stereo Matching: Jintu Zheng,

Qizhe Liu,

Huangxin Xu,

Zhuojie Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jintu and Liu, Qizhe and Xu, Huangxin and Chen, Zhuojie}, title = {Pip-Stereo: Progressive Iterations Pruner for Iterative Optimization based Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7503-7512} }
Fast Reasoning Segmentation for Images and Videos: Yiqing Shen,

Mathias Unberath; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Yiqing and Unberath, Mathias}, title = {Fast Reasoning Segmentation for Images and Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34765-34774} }
Blink: Dynamic Visual Token Resolution for Enhanced Multimodal Understanding: Yuchen Feng,

Zhenyu Zhang,

Naibin Gu,

Yilong Chen,

Peng Fu,

Zheng Lin,

Shuohuan Wang,

Yu Sun,

Hua Wu,

Weiping Wang,

Haifeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yuchen and Zhang, Zhenyu and Gu, Naibin and Chen, Yilong and Fu, Peng and Lin, Zheng and Wang, Shuohuan and Sun, Yu and Wu, Hua and Wang, Weiping and Wang, Haifeng}, title = {Blink: Dynamic Visual Token Resolution for Enhanced Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3511-3521} }
Depth Any Panoramas: A Foundation Model for Panoramic Depth Estimation: Xin Lin,

Meixi Song,

Dizhe Zhang,

Wenxuan Lu,

Haodong Li,

Bo Du,

Ming-Hsuan Yang,

Truong Nguyen,

Lu Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Xin and Song, Meixi and Zhang, Dizhe and Lu, Wenxuan and Li, Haodong and Du, Bo and Yang, Ming-Hsuan and Nguyen, Truong and Qi, Lu}, title = {Depth Any Panoramas: A Foundation Model for Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26835-26844} }
Towards Fine-Grained Attribution: Instance-Aware Preference Optimization for Aligning Diffusion Models: Jiayang Sun,

Pin Wang,

Hongbo Wang,

Xinyue Liu,

Huaibo Huang,

Ran He; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jiayang and Wang, Pin and Wang, Hongbo and Liu, Xinyue and Huang, Huaibo and He, Ran}, title = {Towards Fine-Grained Attribution: Instance-Aware Preference Optimization for Aligning Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43155-43164} }
Identity-Preserving Image-to-Video Generation via Reward-Guided Optimization: Liao Shen,

Wentao Jiang,

Yiran Zhu,

Jiahe Li,

Tiezheng Ge,

Zhiguo Cao,

Bo Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Liao and Jiang, Wentao and Zhu, Yiran and Li, Jiahe and Ge, Tiezheng and Cao, Zhiguo and Zheng, Bo}, title = {Identity-Preserving Image-to-Video Generation via Reward-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27281-27290} }
MoRGS: Efficient Per-Gaussian Motion Reasoning for Streamable Dynamic 3D Scenes: Wonjoon Lee,

Sungmin Woo,

Donghyeong Kim,

Jungho Lee,

Sangheon Park,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Wonjoon and Woo, Sungmin and Kim, Donghyeong and Lee, Jungho and Park, Sangheon and Lee, Sangyoun}, title = {MoRGS: Efficient Per-Gaussian Motion Reasoning for Streamable Dynamic 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41044-41053} }
CREward: A Type-Specific Creativity Reward Model: Jiyeon Han,

Ali Mahdavi-Amiri,

Hao Zhang,

Haedong Jeong; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jiyeon and Mahdavi-Amiri, Ali and Zhang, Hao and Jeong, Haedong}, title = {CREward: A Type-Specific Creativity Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21932-21941} }
TimeRipples: Accelerating vDiTs by Understanding the Spatio-Temporal Correlations in Latent Space: Wenxuan Mao,

Yulin Sun,

Aiyue Chen,

Jing Lin,

Yiwu Yao,

Yiming Gan,

Jieru Zhao,

Jingwen Leng,

Minyi Guo,

Yu Feng; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Wenxuan and Sun, Yulin and Chen, Aiyue and Lin, Jing and Yao, Yiwu and Gan, Yiming and Zhao, Jieru and Leng, Jingwen and Guo, Minyi and Feng, Yu}, title = {TimeRipples: Accelerating vDiTs by Understanding the Spatio-Temporal Correlations in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25688-25698} }
PromptMoE: A Segmentation Refinement Framework Leveraging Mixture of Experts for Improved Prompting: Stephen Price,

Danielle L. Cote,

Elke A. Rundensteiner; [pdf] [supp]
[bibtex]
@InProceedings{Price_2026_CVPR, author = {Price, Stephen and Cote, Danielle L. and Rundensteiner, Elke A.}, title = {PromptMoE: A Segmentation Refinement Framework Leveraging Mixture of Experts for Improved Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6325-6335} }
Towards Reasoning-Preserving Unlearning in Multimodal Large Language Models: Hongji Li,

Manjiang Yu,

Junchi Yao,

Priyanka Singh,

Xue Li,

Di Wang,

Lijie Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hongji and Yu, Manjiang and Yao, Junchi and Singh, Priyanka and Li, Xue and Wang, Di and Hu, Lijie}, title = {Towards Reasoning-Preserving Unlearning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10251-10261} }
GUI-CEval: A Hierarchical and Comprehensive Chinese Benchmark for Mobile GUI Agents: Yang Li,

Yuchen Liu,

Haoyu Lu,

Zhiqiang Xia,

Hongzhen Wang,

Kaiyang Han,

Changpeng Yang,

Jinyang Wu,

Jiaming Xu,

Runyu Shi,

Ying Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yang and Liu, Yuchen and Lu, Haoyu and Xia, Zhiqiang and Wang, Hongzhen and Han, Kaiyang and Yang, Changpeng and Wu, Jinyang and Xu, Jiaming and Shi, Runyu and Huang, Ying}, title = {GUI-CEval: A Hierarchical and Comprehensive Chinese Benchmark for Mobile GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20303-20312} }
TAR: Token-Aware Refinement for Fine-grained Generalized Category Discovery: Xingyu Yang,

Yu Zhang,

Siya Mi,

Xiu-Shen Wei; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Zhang, Yu and Mi, Siya and Wei, Xiu-Shen}, title = {TAR: Token-Aware Refinement for Fine-grained Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31995-32004} }
SpaceTimePilot: Generative Rendering of Dynamic Scenes Across Space and Time: Zhening Huang,

Hyeonho Jeong,

Xuelin Chen,

Yulia Gryaditskaya,

Tuanfeng Y. Wang,

Joan Lasenby,

Chun-Hao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhening and Jeong, Hyeonho and Chen, Xuelin and Gryaditskaya, Yulia and Wang, Tuanfeng Y. and Lasenby, Joan and Huang, Chun-Hao}, title = {SpaceTimePilot: Generative Rendering of Dynamic Scenes Across Space and Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11218-11228} }
TAS-LoRA: Transformer Architecture Search with Mixture-of-LoRA Experts: Jeimin Jeon,

Hyunju Lee,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2026_CVPR, author = {Jeon, Jeimin and Lee, Hyunju and Ham, Bumsub}, title = {TAS-LoRA: Transformer Architecture Search with Mixture-of-LoRA Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20243-20252} }
Towards Cross-Modal Preservation, Consistency and Alignment for Privacy-Preserving Visible-Infrared Person Re-Identification: Yudi Xie,

Zhongao Zhou,

Bin Yang,

Zhenghan Chen,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yudi and Zhou, Zhongao and Yang, Bin and Chen, Zhenghan and Ye, Mang}, title = {Towards Cross-Modal Preservation, Consistency and Alignment for Privacy-Preserving Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11273-11282} }
Geometrically-Constrained Agent for Spatial Reasoning: Zeren Chen,

Xiaoya Lu,

Zhijie Zheng,

Pengrui Li,

Lehan He,

Yijin Zhou,

Jing Shao,

Bohan Zhuang,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zeren and Lu, Xiaoya and Zheng, Zhijie and Li, Pengrui and He, Lehan and Zhou, Yijin and Shao, Jing and Zhuang, Bohan and Sheng, Lu}, title = {Geometrically-Constrained Agent for Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38689-38699} }
Beyond Reassembly: Fractured Object Recovery with Missing Parts: Qun-Ce Xu,

Jiahui Li,

Yan-Pei Cao,

Weihao Cheng,

Tai-Jiang Mu,

Ying Shan,

Chuan Li,

Da Chen,

Yong-Liang Yang,

Shi-min Hu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qun-Ce and Li, Jiahui and Cao, Yan-Pei and Cheng, Weihao and Mu, Tai-Jiang and Shan, Ying and Li, Chuan and Chen, Da and Yang, Yong-Liang and Hu, Shi-min}, title = {Beyond Reassembly: Fractured Object Recovery with Missing Parts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20488-20498} }
Learning Hierarchical Hyperbolic Mixture Model for Part-aware 3D Generation: Qitong Yang,

Mingtao Feng,

Zijie Wu,

Huixin Zhu,

Weisheng Dong,

Yaonan Wang,

Ajmal Mian; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Qitong and Feng, Mingtao and Wu, Zijie and Zhu, Huixin and Dong, Weisheng and Wang, Yaonan and Mian, Ajmal}, title = {Learning Hierarchical Hyperbolic Mixture Model for Part-aware 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12695-12705} }
Frame2Freq: Spectral Adapters for Fine-Grained Video Understanding: Thinesh Thiyakesan Ponbagavathi,

Constantin Seibold,

Alina Roitberg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ponbagavathi_2026_CVPR, author = {Ponbagavathi, Thinesh Thiyakesan and Seibold, Constantin and Roitberg, Alina}, title = {Frame2Freq: Spectral Adapters for Fine-Grained Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24073-24083} }
More Than Meets the Eye: A Unified Image Fusion Framework via Semantic-Pixel Entropy Trade-off for Zero-Shot Generalization: Xiaowen Liu,

Jing Li,

Hongtao Huo,

Haozhe Cao,

Renhua Wang,

Xu Dong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaowen and Li, Jing and Huo, Hongtao and Cao, Haozhe and Wang, Renhua and Dong, Xu}, title = {More Than Meets the Eye: A Unified Image Fusion Framework via Semantic-Pixel Entropy Trade-off for Zero-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41510-41520} }
From Measurement to Mitigation: Quantifying and Reducing Identity Leakage in Image Representation Encoders with Linear Subspace Removal: Daniel George,

Charles Yeh,

Daniel Lee,

Yifei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{George_2026_CVPR, author = {George, Daniel and Yeh, Charles and Lee, Daniel and Zhang, Yifei}, title = {From Measurement to Mitigation: Quantifying and Reducing Identity Leakage in Image Representation Encoders with Linear Subspace Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3284-3293} }
CaricHarmony: Contrastive Diffusion Paths for Identity-Preserving Caricature Synthesis: Dongyu Wang,

Dar-Yen Chen,

Yi-Zhe Song; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dongyu and Chen, Dar-Yen and Song, Yi-Zhe}, title = {CaricHarmony: Contrastive Diffusion Paths for Identity-Preserving Caricature Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36126-36135} }
PhyCritic: Multimodal Critic Models for Physical AI: Tianyi Xiong,

Shihao Wang,

Guilin Liu,

Yi Dong,

Ming Li,

Heng Huang,

Jan Kautz,

Zhiding Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianyi and Wang, Shihao and Liu, Guilin and Dong, Yi and Li, Ming and Huang, Heng and Kautz, Jan and Yu, Zhiding}, title = {PhyCritic: Multimodal Critic Models for Physical AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36881-36892} }
UI-Lens: Assessing General MLLMs' Potential to Automate UI Display Quality Assurance: Wei Xiang,

Yexinrui Wu,

Xinli Chen,

Xinran Li,

Shi Chen; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Wei and Wu, Yexinrui and Chen, Xinli and Li, Xinran and Chen, Shi}, title = {UI-Lens: Assessing General MLLMs' Potential to Automate UI Display Quality Assurance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25882-25892} }
SegEarth-R2: Towards Comprehensive Language-guided Segmentation for Remote Sensing Images: Zepeng Xin,

Kaiyu Li,

Luodi Chen,

Wanchen Li,

Xiao Yuchen,

Hui Qiao,

Weizhan Zhang,

Deyu Meng,

Xiangyong Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xin_2026_CVPR, author = {Xin, Zepeng and Li, Kaiyu and Chen, Luodi and Li, Wanchen and Yuchen, Xiao and Qiao, Hui and Zhang, Weizhan and Meng, Deyu and Cao, Xiangyong}, title = {SegEarth-R2: Towards Comprehensive Language-guided Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13199-13210} }
Scaling Parallel Sequence Models to Vision Foundation Models: Yitong Jiang,

Collin McCarthy,

Hongjun Wang,

Hanrong Ye,

Qi Dou,

Tianfan Xue,

Jinwei Gu,

Jan Kautz,

Hongxu Yin,

Pavlo Molchanov,

Sifei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yitong and McCarthy, Collin and Wang, Hongjun and Ye, Hanrong and Dou, Qi and Xue, Tianfan and Gu, Jinwei and Kautz, Jan and Yin, Hongxu and Molchanov, Pavlo and Liu, Sifei}, title = {Scaling Parallel Sequence Models to Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41332-41341} }
DA-VAE: Plug-in Latent Compression for Diffusion via Detail Alignment: Xin Cai,

Zhiyuan You,

Zhoutong Zhang,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Xin and You, Zhiyuan and Zhang, Zhoutong and Xue, Tianfan}, title = {DA-VAE: Plug-in Latent Compression for Diffusion via Detail Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18703-18713} }
AvatarPointillist: AutoRegressive 4D Gaussian Avatarization: Hongyu Liu,

Xuan Wang,

Zijian Wu,

Yating Wang,

Ziyu Wan,

Yue Ma,

Runtao Liu,

Boyao Zhou,

Yujun Shen,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hongyu and Wang, Xuan and Wu, Zijian and Wang, Yating and Wan, Ziyu and Ma, Yue and Liu, Runtao and Zhou, Boyao and Shen, Yujun and Chen, Qifeng}, title = {AvatarPointillist: AutoRegressive 4D Gaussian Avatarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11039-11050} }
IPR-1: Interactive Physical Reasoner: Mingyu Zhang,

Lifeng Zhuo,

Tianxi Tan,

Guocan Xie,

Xian Nie,

Yan Li,

Renjie Zhao,

Zizhu He,

Ziyu Wang,

Jiting Cai,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingyu and Zhuo, Lifeng and Tan, Tianxi and Xie, Guocan and Nie, Xian and Li, Yan and Zhao, Renjie and He, Zizhu and Wang, Ziyu and Cai, Jiting and Li, Yong-Lu}, title = {IPR-1: Interactive Physical Reasoner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33415-33425} }
Out of Sight, Out of Track: Adversarial Attacks on Propagation-based Multi-Object Trackers via Query State Manipulation: Halima Bouzidi,

Haoyu Liu,

Yonatan Achamyeleh,

Praneetsai Iddamsetty,

Mohammad Al Faruque; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bouzidi_2026_CVPR, author = {Bouzidi, Halima and Liu, Haoyu and Achamyeleh, Yonatan and Iddamsetty, Praneetsai and Al Faruque, Mohammad}, title = {Out of Sight, Out of Track: Adversarial Attacks on Propagation-based Multi-Object Trackers via Query State Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13326-13335} }
Making Training-Free Diffusion Segmentors Scale with the Generative Power: Benyuan Meng,

Qianqian Xu,

Zitai Wang,

Xiaochun Cao,

Longtao Huang,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Benyuan and Xu, Qianqian and Wang, Zitai and Cao, Xiaochun and Huang, Longtao and Huang, Qingming}, title = {Making Training-Free Diffusion Segmentors Scale with the Generative Power}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35861-35871} }
InternVideo-Next: Towards World-Understanding Video Models: Chenting Wang,

Yuhan Zhu,

Yicheng Xu,

Jiange Yang,

Ziang Yan,

Yali Wang,

Yi Wang,

Limin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenting and Zhu, Yuhan and Xu, Yicheng and Yang, Jiange and Yan, Ziang and Wang, Yali and Wang, Yi and Wang, Limin}, title = {InternVideo-Next: Towards World-Understanding Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16966-16976} }
VMD-FACT: A New Video Dataset and MLLM-based method for Detecting Realistic AI-Generated Video Misinformation: Yongkang Zhang,

Dongyu She,

Baiyu Ji,

Qichuan Geng,

Zhong Zhou,

Yan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongkang and She, Dongyu and Ji, Baiyu and Geng, Qichuan and Zhou, Zhong and Wang, Yan}, title = {VMD-FACT: A New Video Dataset and MLLM-based method for Detecting Realistic AI-Generated Video Misinformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21284-21294} }
GS-CLIP: Zero-shot 3D Anomaly Detection by Geometry-Aware Prompt and Synergistic View Representation Learning: Zehao Deng,

An Liu,

Yan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Zehao and Liu, An and Wang, Yan}, title = {GS-CLIP: Zero-shot 3D Anomaly Detection by Geometry-Aware Prompt and Synergistic View Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35587-35596} }
VLIC: Vision-Language Models As Perceptual Judges for Human-Aligned Image Compression: Kyle Sargent,

Ruiqi Gao,

Philipp Henzler,

Charles Herrmann,

Aleksander Holynski,

Li Fei-Fei,

Jiajun Wu,

Jason Y. Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargent_2026_CVPR, author = {Sargent, Kyle and Gao, Ruiqi and Henzler, Philipp and Herrmann, Charles and Holynski, Aleksander and Fei-Fei, Li and Wu, Jiajun and Zhang, Jason Y.}, title = {VLIC: Vision-Language Models As Perceptual Judges for Human-Aligned Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10462-10471} }
ReMatch: Boosting Representation through Matching for Multimodal Retrieval: Qianying Liu,

Xiao Liang,

Zhiqiang Zhang,

Yibo Chen,

Xu Tang,

Zhongfei Qing,

Fengfan Zhou,

Yao Hu,

Paul Henderson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qianying and Liang, Xiao and Zhang, Zhiqiang and Chen, Yibo and Tang, Xu and Qing, Zhongfei and Zhou, Fengfan and Hu, Yao and Henderson, Paul}, title = {ReMatch: Boosting Representation through Matching for Multimodal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16823-16833} }
VGG-T$^3$: Offline Feed-Forward 3D Reconstruction at Scale: Sven Elflein,

Ruilong Li,

Sérgio Agostinho,

Zan Gojcic,

Laura Leal-Taixé,

Qunjie Zhou,

Aljosa Osep; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elflein_2026_CVPR, author = {Elflein, Sven and Li, Ruilong and Agostinho, S\'ergio and Gojcic, Zan and Leal-Taix\'e, Laura and Zhou, Qunjie and Osep, Aljosa}, title = {VGG-T\${\textasciicircum}3\$: Offline Feed-Forward 3D Reconstruction at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36464-36474} }
SketchRevive: Fine-Grained Pixel-to-Vector Sketch Completion with Diffusion-Prior-Guided Multimodal LLMs: Ran Zuo,

Haoxiang Hu,

Chenxi Pei,

Yanxuan Liu,

Wenwen Qiang,

Fang Liu,

Xiaoming Deng,

Cuixia Ma,

Yong-Jin Liu; [pdf]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Ran and Hu, Haoxiang and Pei, Chenxi and Liu, Yanxuan and Qiang, Wenwen and Liu, Fang and Deng, Xiaoming and Ma, Cuixia and Liu, Yong-Jin}, title = {SketchRevive: Fine-Grained Pixel-to-Vector Sketch Completion with Diffusion-Prior-Guided Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43165-43174} }
Same Attention, Different Truths: Put Logit-Lens over Visual Attention to Detect and Mitigate LVLM Object Hallucination: Zichuan Wang,

Songlin Yang,

Bo Peng,

Zhenchen Tang,

Yang Li,

Beibei Dong,

Jing Dong; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zichuan and Yang, Songlin and Peng, Bo and Tang, Zhenchen and Li, Yang and Dong, Beibei and Dong, Jing}, title = {Same Attention, Different Truths: Put Logit-Lens over Visual Attention to Detect and Mitigate LVLM Object Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25315-25325} }
Emergent Extreme-View Geometry in 3D Foundation Models: Yiwen Zhang,

Joseph Tung,

Ruojin Cai,

David Fouhey,

Hadar Averbuch-Elor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiwen and Tung, Joseph and Cai, Ruojin and Fouhey, David and Averbuch-Elor, Hadar}, title = {Emergent Extreme-View Geometry in 3D Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36411-36421} }
HiconAgent: History Context-aware Policy Optimization for GUI Agents: Xurui Zhou,

Gongwei Chen,

Yuquan Xie,

Zaijing Li,

Kaiwen Zhou,

Shuai Wang,

Shuo Yang,

Zhuotao Tian,

Rui Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xurui and Chen, Gongwei and Xie, Yuquan and Li, Zaijing and Zhou, Kaiwen and Wang, Shuai and Yang, Shuo and Tian, Zhuotao and Shao, Rui}, title = {HiconAgent: History Context-aware Policy Optimization for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13028-13038} }
CLaD: Planning with Grounded Foresight via Cross-Modal Latent Dynamics: Andrew Jeong,

Jaemin Kim,

Sebin Lee,

Sung-Eui Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Andrew and Kim, Jaemin and Lee, Sebin and Yoon, Sung-Eui}, title = {CLaD: Planning with Grounded Foresight via Cross-Modal Latent Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {966-975} }
Rethinking BCE Loss for Multi-Label Image Recognition with Fine-Tuning: Ao Zhou,

Zhiwei Jiang,

Zifeng Cheng,

Cong Wang,

Yafeng Yin,

Shufan Yang,

Qing Gu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Ao and Jiang, Zhiwei and Cheng, Zifeng and Wang, Cong and Yin, Yafeng and Yang, Shufan and Gu, Qing}, title = {Rethinking BCE Loss for Multi-Label Image Recognition with Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38784-38793} }
SE(3)-Equivariance with Geometric and Topological Guidance for Category-Level Object Pose Estimation: Sheng Yu,

Di-Hua Zhai,

Yuanqing Xia; [pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Sheng and Zhai, Di-Hua and Xia, Yuanqing}, title = {SE(3)-Equivariance with Geometric and Topological Guidance for Category-Level Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35114-35123} }
Unblur-SLAM: Dense Neural SLAM for Blurry Inputs: Qi Zhang,

Denis Rozumny,

Francesco Girlanda,

Sezer Karaoglu,

Marc Pollefeys,

Theo Gevers,

Martin R. Oswald; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qi and Rozumny, Denis and Girlanda, Francesco and Karaoglu, Sezer and Pollefeys, Marc and Gevers, Theo and Oswald, Martin R.}, title = {Unblur-SLAM: Dense Neural SLAM for Blurry Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {352-362} }
InstructMix2Mix: Consistent Sparse-View Editing Through Multi-View Model Personalization: Daniel Gilo,

Or Litany; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gilo_2026_CVPR, author = {Gilo, Daniel and Litany, Or}, title = {InstructMix2Mix: Consistent Sparse-View Editing Through Multi-View Model Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29000-29011} }
GDPO-SR: Group Direct Preference Optimization for One-Step Generative Image Super-Resolution: Qiaosi Yi,

Shuai Li,

Rongyuan Wu,

Lingchen Sun,

Zhengqiang Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Qiaosi and Li, Shuai and Wu, Rongyuan and Sun, Lingchen and Zhang, Zhengqiang and Zhang, Lei}, title = {GDPO-SR: Group Direct Preference Optimization for One-Step Generative Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2177-2187} }
SEA: Evaluating Sketch Abstraction Efficiency via Element-level Commonsense Visual Question Answering: Jiho Park,

Sieun Choi,

Jaeyoon Seo,

Minho Sohn,

Yeana Kim,

Jihie Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jiho and Choi, Sieun and Seo, Jaeyoon and Sohn, Minho and Kim, Yeana and Kim, Jihie}, title = {SEA: Evaluating Sketch Abstraction Efficiency via Element-level Commonsense Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31652-31661} }
POCA: Pareto-Optimal Curriculum Alignment for Visual Text Generation: Yaohou Fan,

Qingzhong Wang,

Yongsong Huang,

Junyi Liu,

Tomo Miyazaki,

Shinichiro Omachi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yaohou and Wang, Qingzhong and Huang, Yongsong and Liu, Junyi and Miyazaki, Tomo and Omachi, Shinichiro}, title = {POCA: Pareto-Optimal Curriculum Alignment for Visual Text Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21891-21900} }
SceneScribe-1M: A Large-Scale Video Dataset with Comprehensive Geometric and Semantic Annotations: Yunnan Wang,

Kecheng Zheng,

Jianyuan Wang,

Minghao Chen,

David Novotny,

Christian Rupprecht,

Yinghao Xu,

Xing Zhu,

Wenjun Zeng,

Xin Jin,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yunnan and Zheng, Kecheng and Wang, Jianyuan and Chen, Minghao and Novotny, David and Rupprecht, Christian and Xu, Yinghao and Zhu, Xing and Zeng, Wenjun and Jin, Xin and Shen, Yujun}, title = {SceneScribe-1M: A Large-Scale Video Dataset with Comprehensive Geometric and Semantic Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12628-12639} }
AdaSpark: Adaptive Sparsity for Efficient Long-Video Understanding: Handong Li,

Zikang Liu,

Longteng Guo,

Tongtian Yue,

Yepeng Tang,

Xinxin Zhu,

Chuanyang Zheng,

Ziming Wang,

Zhibin Wang,

Jun Song,

Cheng Yu,

Bo Zheng,

Jing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Handong and Liu, Zikang and Guo, Longteng and Yue, Tongtian and Tang, Yepeng and Zhu, Xinxin and Zheng, Chuanyang and Wang, Ziming and Wang, Zhibin and Song, Jun and Yu, Cheng and Zheng, Bo and Liu, Jing}, title = {AdaSpark: Adaptive Sparsity for Efficient Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40548-40558} }
Solvability of the Viewing Graph Under the Affine Camera Model: Gabriele Pedroni,

Rakshith Madhavan,

Federica Arrigoni; [pdf] [supp]
[bibtex]
@InProceedings{Pedroni_2026_CVPR, author = {Pedroni, Gabriele and Madhavan, Rakshith and Arrigoni, Federica}, title = {Solvability of the Viewing Graph Under the Affine Camera Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26731-26740} }
Seeing Beyond 8bits: Subjective and Objective Quality Assessment of HDR-UGC Videos: Shreshth Saini,

Bowen Chen,

Yilin Wang,

Neil Birkbeck,

Balu Adsumilli,

Alan C. Bovik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saini_2026_CVPR, author = {Saini, Shreshth and Chen, Bowen and Wang, Yilin and Birkbeck, Neil and Adsumilli, Balu and Bovik, Alan C.}, title = {Seeing Beyond 8bits: Subjective and Objective Quality Assessment of HDR-UGC Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15538-15549} }
Direction-aware 3D Large Multimodal Models: Quan Liu,

Weihao Xuan,

Junjue Wang,

Naoto Yokoya,

Ling Shao,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Quan and Xuan, Weihao and Wang, Junjue and Yokoya, Naoto and Shao, Ling and Lu, Shijian}, title = {Direction-aware 3D Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9668-9678} }
DreamingComics: A Story Visualization Pipeline via Subject and Layout Customized Generation using Video Models: Patrick Kwon,

Chen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Patrick and Chen, Chen}, title = {DreamingComics: A Story Visualization Pipeline via Subject and Layout Customized Generation using Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36769-36780} }
DeAR: Fine-Grained VLM Adaptation by Decomposing Attention Head Roles: Yiming Ma,

Hongkun Yang,

Lionel Z. Wang,

Bin Chen,

Weizhi Xian,

Jianzhi Teng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yiming and Yang, Hongkun and Wang, Lionel Z. and Chen, Bin and Xian, Weizhi and Teng, Jianzhi}, title = {DeAR: Fine-Grained VLM Adaptation by Decomposing Attention Head Roles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31514-31523} }
Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding: Zhongxing Xu,

Zhonghua Wang,

Zhe Qian,

Dachuan Shi,

Feilong Tang,

Ming Hu,

Shiyan Su,

Xiaocheng Zou,

Wei Feng,

Dwarikanath Mahapatra,

Yifan Peng,

Minquan Lin,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhongxing and Wang, Zhonghua and Qian, Zhe and Shi, Dachuan and Tang, Feilong and Hu, Ming and Su, Shiyan and Zou, Xiaocheng and Feng, Wei and Mahapatra, Dwarikanath and Peng, Yifan and Lin, Minquan and Ge, Zongyuan}, title = {Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11064-11075} }
Hierarchical Action Learning for Weakly-Supervised Action Segmentation: Junxian Huang,

Ruichu Cai,

Juntao Fang,

Hao Zhu,

Boyan Xu,

Weilin Chen,

Zijian Li,

Shenghua Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Junxian and Cai, Ruichu and Fang, Juntao and Zhu, Hao and Xu, Boyan and Chen, Weilin and Li, Zijian and Gao, Shenghua}, title = {Hierarchical Action Learning for Weakly-Supervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6054-6064} }
Seeing Both Sides: Towards Bidirectional Semantic Alignment for Open-Vocabulary Camouflaged Object Segmentation: Guohui Zhang,

Fuming Sun,

Yu Zhao,

Yuqiu Kong,

Jing Sun,

Fasheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohui and Sun, Fuming and Zhao, Yu and Kong, Yuqiu and Sun, Jing and Wang, Fasheng}, title = {Seeing Both Sides: Towards Bidirectional Semantic Alignment for Open-Vocabulary Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27655-27664} }
ELITE: Efficient Gaussian Head Avatar from a Monocular Video via Learned Initialization and Test-time Generative Adaptation: Kim Youwang,

Lee Hyoseok,

Park Subin,

Gerard Pons-Moll,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Youwang_2026_CVPR, author = {Youwang, Kim and Hyoseok, Lee and Subin, Park and Pons-Moll, Gerard and Oh, Tae-Hyun}, title = {ELITE: Efficient Gaussian Head Avatar from a Monocular Video via Learned Initialization and Test-time Generative Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40152-40162} }
ParaUni: Enhance Generation in Unified Multimodal Model with Reinforcement-driven Hierarchical Parallel Information Interaction: Jiangtong Tan,

Lin Liu,

Jie Huang,

Xiaopeng Zhang,

Qi Tian,

Feng Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Jiangtong and Liu, Lin and Huang, Jie and Zhang, Xiaopeng and Tian, Qi and Zhao, Feng}, title = {ParaUni: Enhance Generation in Unified Multimodal Model with Reinforcement-driven Hierarchical Parallel Information Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41836-41846} }
MuCo: Multi-turn Contrastive Learning for Multimodal Embedding Model: Geonmo Gu,

Byeongho Heo,

Jaemyung Yu,

Jaehui Hwang,

Taekyung Kim,

Sangmin Lee,

HeeJae Jun,

Yoohoon Kang,

Sangdoo Yun,

Dongyoon Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Geonmo and Heo, Byeongho and Yu, Jaemyung and Hwang, Jaehui and Kim, Taekyung and Lee, Sangmin and Jun, HeeJae and Kang, Yoohoon and Yun, Sangdoo and Han, Dongyoon}, title = {MuCo: Multi-turn Contrastive Learning for Multimodal Embedding Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1749-1758} }
Multi-view Crowd Tracking Transformer with View-Ground Interactions Under Large Real-World Scenes: Qi Zhang,

Jixuan Chen,

Kaiyi Zhang,

Xinquan Yu,

Antoni B. Chan,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qi and Chen, Jixuan and Zhang, Kaiyi and Yu, Xinquan and Chan, Antoni B. and Huang, Hui}, title = {Multi-view Crowd Tracking Transformer with View-Ground Interactions Under Large Real-World Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13626-13635} }
Geo2: Geometry-Guided Cross-view Geo-Localization and Image Synthesis: Yancheng Zhang,

Xiaohan Zhang,

Guangyu Sun,

Zonglin Lyu,

Safwan Wshah,

Chen Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yancheng and Zhang, Xiaohan and Sun, Guangyu and Lyu, Zonglin and Wshah, Safwan and Chen, Chen}, title = {Geo2: Geometry-Guided Cross-view Geo-Localization and Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19432-19442} }
Instruction-Guided Lesion Segmentation for Chest X-rays with Automatically Generated Large-Scale Dataset: Geon Choi,

Hangyul Yoon,

Hyunju Shin,

Hyunki Park,

Sang Hoon Seo,

Eunho Yang,

Edward Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Geon and Yoon, Hangyul and Shin, Hyunju and Park, Hyunki and Seo, Sang Hoon and Yang, Eunho and Choi, Edward}, title = {Instruction-Guided Lesion Segmentation for Chest X-rays with Automatically Generated Large-Scale Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1482-1492} }
Efficiency Follows Global-Local Decoupling: Zhenyu Yang,

Gensheng Pei,

Tao Chen,

Yichao Zhou,

Tianfei Zhou,

Yazhou Yao,

Fumin Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenyu and Pei, Gensheng and Chen, Tao and Zhou, Yichao and Zhou, Tianfei and Yao, Yazhou and Shen, Fumin}, title = {Efficiency Follows Global-Local Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25524-25535} }
Decoding 3D Perception via BrainSSD: Synergistic Fusion of EEG Representations from Static and Dynamic Visual Streams: Yincheng Yao,

Enze Shi,

Shu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Yincheng and Shi, Enze and Zhang, Shu}, title = {Decoding 3D Perception via BrainSSD: Synergistic Fusion of EEG Representations from Static and Dynamic Visual Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42711-42721} }
Lifting Unlabeled Internet-level Data for 3D Scene Understanding: Yixin Chen,

Yaowei Zhang,

Huangyue Yu,

Junchao He,

Yan Wang,

Jiangyong Huang,

Hongyu Shen,

Junfeng Ni,

Shaofei Wang,

Baoxiong Jia,

Song-Chun Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Zhang, Yaowei and Yu, Huangyue and He, Junchao and Wang, Yan and Huang, Jiangyong and Shen, Hongyu and Ni, Junfeng and Wang, Shaofei and Jia, Baoxiong and Zhu, Song-Chun and Huang, Siyuan}, title = {Lifting Unlabeled Internet-level Data for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5814-5827} }
AudioAvatar: Personalized Audio-driven Whole-body Talking Avatars: Seungeun Lee,

SeungJun Moon,

Hah Min Lew,

Ji-Su Kang,

Gyeong-Moon Park; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seungeun and Moon, SeungJun and Lew, Hah Min and Kang, Ji-Su and Park, Gyeong-Moon}, title = {AudioAvatar: Personalized Audio-driven Whole-body Talking Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3998-4010} }
GUIDE: A Benchmark for Understanding and Assisting Users in Open-Ended GUI Tasks: Saelyne Yang,

Jaesang Yu,

Yi-Hao Peng,

Kevin Qinghong Lin,

Jae Won Cho,

Yale Song,

Juho Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Saelyne and Yu, Jaesang and Peng, Yi-Hao and Lin, Kevin Qinghong and Cho, Jae Won and Song, Yale and Kim, Juho}, title = {GUIDE: A Benchmark for Understanding and Assisting Users in Open-Ended GUI Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13017-13027} }
Ov3R: Open-Vocabulary Semantic 3D Reconstruction from RGB Videos: Ziren Gong,

Xiaohan Li,

Fabio Tosi,

Jiawei Han,

Stefano Mattoccia,

Jianfei Cai,

Matteo Poggi; [pdf] [arXiv]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Ziren and Li, Xiaohan and Tosi, Fabio and Han, Jiawei and Mattoccia, Stefano and Cai, Jianfei and Poggi, Matteo}, title = {Ov3R: Open-Vocabulary Semantic 3D Reconstruction from RGB Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34206-34216} }
GeoRelight: Learning Joint Geometrical Relighting and Reconstruction with Flexible Multi-Modal Diffusion Transformers: Yuxuan Xue,

Ruofan Liang,

Egor Zakharov,

Timur Bagautdinov,

Chen Cao,

Giljoo Nam,

Shunsuke Saito,

Gerard Pons-Moll,

Javier Romero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Yuxuan and Liang, Ruofan and Zakharov, Egor and Bagautdinov, Timur and Cao, Chen and Nam, Giljoo and Saito, Shunsuke and Pons-Moll, Gerard and Romero, Javier}, title = {GeoRelight: Learning Joint Geometrical Relighting and Reconstruction with Flexible Multi-Modal Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29771-29780} }
Yume1.5: A Text-Controlled Interactive World Generation Model: Xiaofeng Mao,

Zhen Li,

Chuanhao Li,

Xiaojie Xu,

Kaining Ying,

Kaipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Xiaofeng and Li, Zhen and Li, Chuanhao and Xu, Xiaojie and Ying, Kaining and Zhang, Kaipeng}, title = {Yume1.5: A Text-Controlled Interactive World Generation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7752-7761} }
UST-Hand: An Uncertainty-aware Spatiotemporal Point Cloud Interaction Network for 3D Self-supervised Hand Pose Estimation: Tianhao Han,

Haoyang Zhang,

Liang Xie,

Haochen Chang,

Kun Gao,

Yuan Cheng,

Pengfei Ren,

Erwei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Tianhao and Zhang, Haoyang and Xie, Liang and Chang, Haochen and Gao, Kun and Cheng, Yuan and Ren, Pengfei and Yin, Erwei}, title = {UST-Hand: An Uncertainty-aware Spatiotemporal Point Cloud Interaction Network for 3D Self-supervised Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8857-8867} }
MotionCrafter: Dense Geometry and Motion Reconstruction with a 4D VAE: Ruijie Zhu,

Jiahao Lu,

Wenbo Hu,

Xiaoguang Han,

Jianfei Cai,

Ying Shan,

Chuanxia Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ruijie and Lu, Jiahao and Hu, Wenbo and Han, Xiaoguang and Cai, Jianfei and Shan, Ying and Zheng, Chuanxia}, title = {MotionCrafter: Dense Geometry and Motion Reconstruction with a 4D VAE}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40303-40315} }
cryoSENSE: Compressive Sensing Enables High-throughput Microscopy with Sparse and Generative Priors on the Protein Cryo-EM Image Manifold: Zain Shabeeb,

Daniel Saeedi,

Darin Tsui,

Vida Jamali,

Amirali Aghazadeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shabeeb_2026_CVPR, author = {Shabeeb, Zain and Saeedi, Daniel and Tsui, Darin and Jamali, Vida and Aghazadeh, Amirali}, title = {cryoSENSE: Compressive Sensing Enables High-throughput Microscopy with Sparse and Generative Priors on the Protein Cryo-EM Image Manifold}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34072-34083} }
SMRABooth: Subject and Motion Representation Alignment for Customized Video Generation: Xuancheng Xu,

Yaning Li,

Sisi You,

Bing-Kun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Xuancheng and Li, Yaning and You, Sisi and Bao, Bing-Kun}, title = {SMRABooth: Subject and Motion Representation Alignment for Customized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16130-16141} }
4DEquine: Disentangling Motion and Appearance for 4D Equine Reconstruction from Monocular Video: Jin Lyu,

Liang An,

Pujin Cheng,

Yebin Liu,

Xiaoying Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Jin and An, Liang and Cheng, Pujin and Liu, Yebin and Tang, Xiaoying}, title = {4DEquine: Disentangling Motion and Appearance for 4D Equine Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32496-32506} }
PowerCLIP: Powerset Alignment for Contrastive Pre-Training: Masaki Kawamura,

Nakamasa Inoue,

Rintaro Yanagi,

Hirokatsu Kataoka,

Rio Yokota; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawamura_2026_CVPR, author = {Kawamura, Masaki and Inoue, Nakamasa and Yanagi, Rintaro and Kataoka, Hirokatsu and Yokota, Rio}, title = {PowerCLIP: Powerset Alignment for Contrastive Pre-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22111-22122} }
Monet: Reasoning in Latent Visual Space Beyond Image and Language: Qixun Wang,

Yang Shi,

Yifei Wang,

Yuanxing Zhang,

Pengfei Wan,

Kun Gai,

Xianghua Ying,

Yisen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qixun and Shi, Yang and Wang, Yifei and Zhang, Yuanxing and Wan, Pengfei and Gai, Kun and Ying, Xianghua and Wang, Yisen}, title = {Monet: Reasoning in Latent Visual Space Beyond Image and Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12030-12040} }
GLINT: Modeling Scene-Scale Transparency via Gaussian Radiance Transport: Youngju Na,

Jaeseong Yun,

Soohyun Ryu,

Hyunsu Kim,

Sung-Eui Yoon,

Suyong Yeon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Na_2026_CVPR, author = {Na, Youngju and Yun, Jaeseong and Ryu, Soohyun and Kim, Hyunsu and Yoon, Sung-Eui and Yeon, Suyong}, title = {GLINT: Modeling Scene-Scale Transparency via Gaussian Radiance Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7256-7265} }
CoCoVideo: The High-Quality Commercial-Model-Based Contrastive Benchmark for AI-Generated Video Detection: Huidong Feng,

Wentao Chen,

Jie Chen,

Xinqi Cai,

Ruolong Ma,

Yinglin Zheng,

Yuxin Lin,

Ming Zeng; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Huidong and Chen, Wentao and Chen, Jie and Cai, Xinqi and Ma, Ruolong and Zheng, Yinglin and Lin, Yuxin and Zeng, Ming}, title = {CoCoVideo: The High-Quality Commercial-Model-Based Contrastive Benchmark for AI-Generated Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11346-11356} }
Expanding mmWave Datasets for Human Pose Estimation with Unlabeled Data and LiDAR Datasets: Zhuoxuan Peng,

Boan Zhu,

Xingjian Zhang,

Wenying Li,

S.-H. Gary Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Zhuoxuan and Zhu, Boan and Zhang, Xingjian and Li, Wenying and Chan, S.-H. Gary}, title = {Expanding mmWave Datasets for Human Pose Estimation with Unlabeled Data and LiDAR Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21221-21230} }
Spectral Super-Resolution via Adversarial Unfolding and Data-Driven Spectrum Regularization: From Multispectral Satellite Data to NASA Hyperspectral Image: Si-Sheng Young,

Chia-Hsiang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Young_2026_CVPR, author = {Young, Si-Sheng and Lin, Chia-Hsiang}, title = {Spectral Super-Resolution via Adversarial Unfolding and Data-Driven Spectrum Regularization: From Multispectral Satellite Data to NASA Hyperspectral Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27827-27837} }
Copy-Transform-Paste: Zero-Shot Object-Object Alignment Guided by Vision-Language and Geometric Constraints: Rotem Gatenyo,

Ohad Fried; [pdf] [supp]
[bibtex]
@InProceedings{Gatenyo_2026_CVPR, author = {Gatenyo, Rotem and Fried, Ohad}, title = {Copy-Transform-Paste: Zero-Shot Object-Object Alignment Guided by Vision-Language and Geometric Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14936-14945} }
When to Think and When to Look: Uncertainty-Guided Lookback: Jing Bi,

Filippos Bellos,

Junjia Guo,

Yayuan Li,

Chao Huang,

Yunlong Tang,

Luchuan Song,

Susan Liang,

Zhongfei Zhang,

Jason J. Corso,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bi_2026_CVPR, author = {Bi, Jing and Bellos, Filippos and Guo, Junjia and Li, Yayuan and Huang, Chao and Tang, Yunlong and Song, Luchuan and Liang, Susan and Zhang, Zhongfei and Corso, Jason J. and Xu, Chenliang}, title = {When to Think and When to Look: Uncertainty-Guided Lookback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5104-5113} }
Generative Adversarial Perturbations with Cross-paradigm Transferability on Localized Crowd Counting: Alabi Mehzabin Anisha,

Guangjing Wang,

Sriram Chellappan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Anisha_2026_CVPR, author = {Anisha, Alabi Mehzabin and Wang, Guangjing and Chellappan, Sriram}, title = {Generative Adversarial Perturbations with Cross-paradigm Transferability on Localized Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20629-20638} }
Multi-Metric Representation Learning Strategy Based on Clustering for Fine-Grained Multimodal Sentiment Analysis: Yidan Wang,

Zongheng Wang,

Hongjie Xing,

Chunguo Li,

Xiaoxiao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yidan and Wang, Zongheng and Xing, Hongjie and Li, Chunguo and Liu, Xiaoxiao}, title = {Multi-Metric Representation Learning Strategy Based on Clustering for Fine-Grained Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37864-37873} }
MangoBench: A Benchmark for Multi-Agent Goal-Conditioned Offline Reinforcement Learning: Yi Wang,

Ningze Zhong,

Zhiheng Fu,

Longguang Wang,

Ye Zhang,

Yulan Guo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yi and Zhong, Ningze and Fu, Zhiheng and Wang, Longguang and Zhang, Ye and Guo, Yulan}, title = {MangoBench: A Benchmark for Multi-Agent Goal-Conditioned Offline Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6219-6228} }
VectorArk: Learning Practical Image Vectorization with Rounded Polygon Representation: Tarun Gehlaut,

Difan Liu,

Charu Bansal,

Krutik Malani,

Souymodip Chakraborty,

Ankit Phogat,

Matthew Fisher,

Vineet Batra; [pdf] [supp]
[bibtex]
@InProceedings{Gehlaut_2026_CVPR, author = {Gehlaut, Tarun and Liu, Difan and Bansal, Charu and Malani, Krutik and Chakraborty, Souymodip and Phogat, Ankit and Fisher, Matthew and Batra, Vineet}, title = {VectorArk: Learning Practical Image Vectorization with Rounded Polygon Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31619-31627} }
ProxyFL: A Proxy-Guided Framework for Federated Semi-Supervised Learning: Duowen Chen,

Yan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Duowen and Wang, Yan}, title = {ProxyFL: A Proxy-Guided Framework for Federated Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17525-17534} }
FedRG: Unleashing the Representation Geometry for Federated Learning with Noisy Clients: Tian Wen,

Zhiqin Yang,

Yonggang Zhang,

Xuefeng Jiang,

Hao Peng,

Yuwei Wang,

Bo Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Tian and Yang, Zhiqin and Zhang, Yonggang and Jiang, Xuefeng and Peng, Hao and Wang, Yuwei and Han, Bo}, title = {FedRG: Unleashing the Representation Geometry for Federated Learning with Noisy Clients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24545-24556} }
MUFASA: A Multi-Layer Framework for Slot Attention: Sebastian Bock,

Leonie Schüßler,

Krishnakant Singh,

Simone Schaub-Meyer,

Stefan Roth; [pdf] [supp]
[bibtex]
@InProceedings{Bock_2026_CVPR, author = {Bock, Sebastian and Sch\"u{\ss}ler, Leonie and Singh, Krishnakant and Schaub-Meyer, Simone and Roth, Stefan}, title = {MUFASA: A Multi-Layer Framework for Slot Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27750-27760} }
RAMEN: Resolution-Adjustable Multimodal Encoder for Earth Observation: Nicolas Houdré,

Diego Marcos,

Hugo Riffaud de Turckheim,

Dino Ienco,

Laurent Wendling,

Camille Kurtz,

Sylvain Lobry; [pdf] [supp]
[bibtex]
@InProceedings{Houdre_2026_CVPR, author = {Houdr\'e, Nicolas and Marcos, Diego and de Turckheim, Hugo Riffaud and Ienco, Dino and Wendling, Laurent and Kurtz, Camille and Lobry, Sylvain}, title = {RAMEN: Resolution-Adjustable Multimodal Encoder for Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27838-27848} }
Designing to Forget: Deep Semi-parametric Models for Unlearning: Amber Yijia Zheng,

Yu-Shan Tai,

Raymond A. Yeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Amber Yijia and Tai, Yu-Shan and Yeh, Raymond A.}, title = {Designing to Forget: Deep Semi-parametric Models for Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17409-17419} }
DriveCombo: Benchmarking Compositional Traffic Rule Reasoning in Autonomous Driving: Enhui Ma,

Jiahuan Zhang,

Guantian Zheng,

Tao Tang,

Shengbo Eben Li,

Yuhang Lu,

Xia Zhou,

Xueyang Zhang,

Yifei Zhan,

Kun Zhan,

Zhihui Hao,

Xianpeng Lang,

Kaicheng Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Enhui and Zhang, Jiahuan and Zheng, Guantian and Tang, Tao and Li, Shengbo Eben and Lu, Yuhang and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Hao, Zhihui and Lang, Xianpeng and Yu, Kaicheng}, title = {DriveCombo: Benchmarking Compositional Traffic Rule Reasoning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32113-32123} }
SAVE: Speech-Aware Video Representation Learning for Video-Text Retrieval: Ruixiang Zhao,

Zhihao Xu,

Bangxiang Lan,

Zijie Xin,

Jingyu Liu,

Xirong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruixiang and Xu, Zhihao and Lan, Bangxiang and Xin, Zijie and Liu, Jingyu and Li, Xirong}, title = {SAVE: Speech-Aware Video Representation Learning for Video-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31166-31175} }
Forecast the Principal, Stabilize the Residual: Subspace-Aware Feature Caching for Diffusion Transformers: Guantao Chen,

Shikang Zheng,

Yuqi Lin,

Linfeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Guantao and Zheng, Shikang and Lin, Yuqi and Zhang, Linfeng}, title = {Forecast the Principal, Stabilize the Residual: Subspace-Aware Feature Caching for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23632-23641} }
ViLoMem: Agentic Learner with Grow-and-Refine Multimodal Semantic Memory: Weihao Bo,

Shan Zhang,

Yanpeng Sun,

Jingjing Wu,

Qunyi Xie,

Xiao Tan,

Kunbin Chen,

Wei He,

Xiaofan Li,

Na Zhao,

Jingdong Wang,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Weihao and Zhang, Shan and Sun, Yanpeng and Wu, Jingjing and Xie, Qunyi and Tan, Xiao and Chen, Kunbin and He, Wei and Li, Xiaofan and Zhao, Na and Wang, Jingdong and Li, Zechao}, title = {ViLoMem: Agentic Learner with Grow-and-Refine Multimodal Semantic Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5476-5486} }
Time Blindness: Why Video-Language Models Can't See What Humans Can?: Ujjwal Upadhyay,

Mukul Ranjan,

Zhiqiang Shen,

Mohamed Elhoseiny; [pdf] [supp]
[bibtex]
@InProceedings{Upadhyay_2026_CVPR, author = {Upadhyay, Ujjwal and Ranjan, Mukul and Shen, Zhiqiang and Elhoseiny, Mohamed}, title = {Time Blindness: Why Video-Language Models Can't See What Humans Can?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30906-30918} }
VENI: Variational Encoder for Natural Illumination: Paul Walker,

James A. D. Gardner,

Andreea Ardelean,

William A. P. Smith,

Bernhard Egger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Walker_2026_CVPR, author = {Walker, Paul and Gardner, James A. D. and Ardelean, Andreea and Smith, William A. P. and Egger, Bernhard}, title = {VENI: Variational Encoder for Natural Illumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16248-16257} }
Learnability-Driven Submodular Optimization for Active Roadside 3D Detection: Ruiyu Mao,

Baoming Zhang,

Nicholas Ruozzi,

Yunhui Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Ruiyu and Zhang, Baoming and Ruozzi, Nicholas and Guo, Yunhui}, title = {Learnability-Driven Submodular Optimization for Active Roadside 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11579-11588} }
Stepwise Credit Assignment for GRPO on Flow-Matching Models: Yash Savani,

Branislav Kveton,

Yuchen Liu,

Yilin Wang,

Jing Shi,

Subhojyoti Mukherjee,

Nikos Vlassis,

Krishna Kumar Singh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Savani_2026_CVPR, author = {Savani, Yash and Kveton, Branislav and Liu, Yuchen and Wang, Yilin and Shi, Jing and Mukherjee, Subhojyoti and Vlassis, Nikos and Singh, Krishna Kumar}, title = {Stepwise Credit Assignment for GRPO on Flow-Matching Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42007-42017} }
MVGGT: Multimodal Visual Geometry Grounded Transformer for Multiview 3D Referring Expression Segmentation: Changli Wu,

Haodong Wang,

Jiayi Ji,

Yutian Yao,

Chunsai Du,

Jihua Kang,

Yanwei Fu,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Changli and Wang, Haodong and Ji, Jiayi and Yao, Yutian and Du, Chunsai and Kang, Jihua and Fu, Yanwei and Cao, Liujuan}, title = {MVGGT: Multimodal Visual Geometry Grounded Transformer for Multiview 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16800-16810} }
Next-Scale Autoregressive Models for Text-to-Motion Generation: Zhiwei Zheng,

Shibo Jin,

Lingjie Liu,

Mingmin Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiwei and Jin, Shibo and Liu, Lingjie and Zhao, Mingmin}, title = {Next-Scale Autoregressive Models for Text-to-Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16376-16386} }
Tea-Adapter: Teacher Adapter for Efficient Conditional Generation: Yinhan Zhang,

Yue Ma,

Fangqiu Yi,

Chenyang Qi,

Chi Zhang,

Kunyu Feng,

Zeyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yinhan and Ma, Yue and Yi, Fangqiu and Qi, Chenyang and Zhang, Chi and Feng, Kunyu and Wang, Zeyu}, title = {Tea-Adapter: Teacher Adapter for Efficient Conditional Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4805-4815} }
Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning: Xinghao Wu,

Jianwei Niu,

Xuefeng Liu,

Guogang Zhu,

Jiayuan Zhang,

Shaojie Tang,

Wei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xinghao and Niu, Jianwei and Liu, Xuefeng and Zhu, Guogang and Zhang, Jiayuan and Tang, Shaojie and Chen, Wei}, title = {Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10313-10323} }
PointWorld: Scaling 3D World Models for In-The-Wild Robotic Manipulation: Wenlong Huang,

Yu-Wei Chao,

Arsalan Mousavian,

Ming-Yu Liu,

Dieter Fox,

Kaichun Mo,

Li Fei-Fei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Wenlong and Chao, Yu-Wei and Mousavian, Arsalan and Liu, Ming-Yu and Fox, Dieter and Mo, Kaichun and Fei-Fei, Li}, title = {PointWorld: Scaling 3D World Models for In-The-Wild Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20765-20779} }
BAgger: Backwards Aggregation for Mitigating Drift in Autoregressive Video Diffusion Models: Ryan Po,

Eric Ryan Chan,

Changan Chen,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Po_2026_CVPR, author = {Po, Ryan and Chan, Eric Ryan and Chen, Changan and Wetzstein, Gordon}, title = {BAgger: Backwards Aggregation for Mitigating Drift in Autoregressive Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43727-43739} }
TAG-MoE: Task-Aware Gating for Unified Generative Mixture-of-Experts: Yu Xu,

Hongbin Yan,

Juan Cao,

Yiji Cheng,

Tiankai Hang,

Runze He,

Zijin Yin,

Shiyi Zhang,

Yuxin Zhang,

Jintao Li,

Chunyu Wang,

Qinglin Lu,

Tong-Yee Lee,

Fan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yu and Yan, Hongbin and Cao, Juan and Cheng, Yiji and Hang, Tiankai and He, Runze and Yin, Zijin and Zhang, Shiyi and Zhang, Yuxin and Li, Jintao and Wang, Chunyu and Lu, Qinglin and Lee, Tong-Yee and Tang, Fan}, title = {TAG-MoE: Task-Aware Gating for Unified Generative Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27270-27280} }
Image-to-Point Cloud Feature Back-Projection for Multimodal Training of 3D Semantic Segmentation: Jiawei Han,

Matteo Poggi,

Li Huan,

Changshuo Wang,

Kaiqi Liu,

Wei Li; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jiawei and Poggi, Matteo and Huan, Li and Wang, Changshuo and Liu, Kaiqi and Li, Wei}, title = {Image-to-Point Cloud Feature Back-Projection for Multimodal Training of 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42049-42060} }
MDCS-MoAME: Multi-directional Composite Scanning with Mixture of Attention and Mamba Experts for Cancer Survival Prediction: Linjie Qu,

Jin Xiao,

Xiangrong Liu,

Changming Sun,

Hui Cui,

Yuqi Fang,

Ran Su,

Qiangguo Jin,

Leyi Wei; [pdf]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Linjie and Xiao, Jin and Liu, Xiangrong and Sun, Changming and Cui, Hui and Fang, Yuqi and Su, Ran and Jin, Qiangguo and Wei, Leyi}, title = {MDCS-MoAME: Multi-directional Composite Scanning with Mixture of Attention and Mamba Experts for Cancer Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14461-14470} }
GeoGuide: Hierarchical Geometric Guidance for Open-Vocabulary 3D Semantic Segmentation: Xujing Tao,

Chuxin Wang,

Yubo Ai,

Zhixin Cheng,

Zhuoyuan Li,

Liangsheng Liu,

Yujia Chen,

Xinjun Li,

Qiao Li,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Xujing and Wang, Chuxin and Ai, Yubo and Cheng, Zhixin and Li, Zhuoyuan and Liu, Liangsheng and Chen, Yujia and Li, Xinjun and Li, Qiao and Yang, Wenfei and Zhang, Tianzhu}, title = {GeoGuide: Hierarchical Geometric Guidance for Open-Vocabulary 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26855-26866} }
Memory Matters: Boosting Training-Free Zero-Shot Temporal Action Localization with a Learnable Lookup Table: Han Jiang,

Haoyu Tang,

Xiaoxuan Mu,

Chen Li,

Jihua Zhu; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Han and Tang, Haoyu and Mu, Xiaoxuan and Li, Chen and Zhu, Jihua}, title = {Memory Matters: Boosting Training-Free Zero-Shot Temporal Action Localization with a Learnable Lookup Table}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9762-9772} }
TRCoRSurg: Temporal-Relational Co-Reasoning for Surgical Video Triplet Recognition: Fang Li,

Shihao Zou,

Weixin Si,

Yang Gao,

Shuai Li,

Aimin Hao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Fang and Zou, Shihao and Si, Weixin and Gao, Yang and Li, Shuai and Hao, Aimin}, title = {TRCoRSurg: Temporal-Relational Co-Reasoning for Surgical Video Triplet Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2811-2820} }
Volumetric Functional Maps: Filippo Maggioli,

Simone Melzi,

Marco Livesu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maggioli_2026_CVPR, author = {Maggioli, Filippo and Melzi, Simone and Livesu, Marco}, title = {Volumetric Functional Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20443-20454} }
Focus-to-Perceive Representation Learning: A Cognition-Inspired Hierarchical Framework for Endoscopic Video Analysis: Yuan Zhang,

Sihao Dou,

Kai Hu,

Shuhua Deng,

Chunhong Cao,

Fen Xiao,

Xieping Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuan and Dou, Sihao and Hu, Kai and Deng, Shuhua and Cao, Chunhong and Xiao, Fen and Gao, Xieping}, title = {Focus-to-Perceive Representation Learning: A Cognition-Inspired Hierarchical Framework for Endoscopic Video Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28264-28274} }
Improved Mean Flows: On the Challenges of Fastforward Generative Models: Zhengyang Geng,

Yiyang Lu,

Zongze Wu,

Eli Shechtman,

J. Zico Kolter,

Kaiming He; [pdf] [arXiv]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Zhengyang and Lu, Yiyang and Wu, Zongze and Shechtman, Eli and Kolter, J. Zico and He, Kaiming}, title = {Improved Mean Flows: On the Challenges of Fastforward Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30467-30476} }
Don't Show Pixels, Show Cues: Unlocking Visual Tool Reasoning in Language Models via Perception Programs: Muhammad Kamran Janjua,

Hugo Silva,

Di Niu,

Bahador Rashidi; [pdf] [supp]
[bibtex]
@InProceedings{Janjua_2026_CVPR, author = {Janjua, Muhammad Kamran and Silva, Hugo and Niu, Di and Rashidi, Bahador}, title = {Don't Show Pixels, Show Cues: Unlocking Visual Tool Reasoning in Language Models via Perception Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5165-5174} }
Prime Once, then Reprogram Locally: An Efficient Alternative to Black-Box Service Model Adaptation: Yunbei Zhang,

Chengyi Cai,

Feng Liu,

Jihun Hamm; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yunbei and Cai, Chengyi and Liu, Feng and Hamm, Jihun}, title = {Prime Once, then Reprogram Locally: An Efficient Alternative to Black-Box Service Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6176-6187} }
Clothe and Pose: Nakul Sharma,

Aayush Bansal,

Minh Vo; [pdf] [supp]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Nakul and Bansal, Aayush and Vo, Minh}, title = {Clothe and Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2015-2024} }
FAVE: A Structured Benchmark for Fine-Grained Audio-Visual Temporal Evaluation in Multimodal LLMs: Weiheng Lu,

An Yu,

Jian Li,

Zhenfei Zhang,

Felix X.-F. Ye,

Ming-Ching Chang; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Weiheng and Yu, An and Li, Jian and Zhang, Zhenfei and Ye, Felix X.-F. and Chang, Ming-Ching}, title = {FAVE: A Structured Benchmark for Fine-Grained Audio-Visual Temporal Evaluation in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1651-1660} }
Bootstrap Dynamic-Aware 3D Visual Representation for Scalable Robot Learning: Qiwei Liang,

Boyang Cai,

Minghao Lai,

Sitong Zhuang,

Tao Lin,

Yan Qin,

Yixuan Ye,

Jiaming Liang,

Renjing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Qiwei and Cai, Boyang and Lai, Minghao and Zhuang, Sitong and Lin, Tao and Qin, Yan and Ye, Yixuan and Liang, Jiaming and Xu, Renjing}, title = {Bootstrap Dynamic-Aware 3D Visual Representation for Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13419-13429} }
DAGE: Dual-Stream Architecture for Efficient and Fine-Grained Geometry Estimation: Tuan Duc Ngo,

Jiahui Huang,

Seoung Wug Oh,

Kevin Blackburn-Matzen,

Evangelos Kalogerakis,

Chuang Gan,

Joon-Young Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ngo_2026_CVPR, author = {Ngo, Tuan Duc and Huang, Jiahui and Oh, Seoung Wug and Blackburn-Matzen, Kevin and Kalogerakis, Evangelos and Gan, Chuang and Lee, Joon-Young}, title = {DAGE: Dual-Stream Architecture for Efficient and Fine-Grained Geometry Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21701-21712} }
Proof-of-Perception: Certified Tool-Using Multimodal Reasoning with Compositional Conformal Guarantees: Arya Fayyazi,

Haleh Akrami; [pdf] [arXiv]
[bibtex]
@InProceedings{Fayyazi_2026_CVPR, author = {Fayyazi, Arya and Akrami, Haleh}, title = {Proof-of-Perception: Certified Tool-Using Multimodal Reasoning with Compositional Conformal Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5144-5153} }
Generating Humanless Environment Walkthroughs from Egocentric Walking Tour Videos: Yujin Ham,

Junho Kim,

Vivek Boominathan,

Guha Balakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ham_2026_CVPR, author = {Ham, Yujin and Kim, Junho and Boominathan, Vivek and Balakrishnan, Guha}, title = {Generating Humanless Environment Walkthroughs from Egocentric Walking Tour Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4322-4331} }
POLAR: A Portrait OLAT Dataset and Generative Framework for Illumination-Aware Face Modeling: Zhuo Chen,

Chengqun Yang,

Zhuo Su,

Zheng Lv,

Jingnan Gao,

Xiaoyuan Zhang,

Xiaokang Yang,

Yichao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuo and Yang, Chengqun and Su, Zhuo and Lv, Zheng and Gao, Jingnan and Zhang, Xiaoyuan and Yang, Xiaokang and Yan, Yichao}, title = {POLAR: A Portrait OLAT Dataset and Generative Framework for Illumination-Aware Face Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28871-28881} }
ORIC: Benchmarking Object Recognition under Contextual Incongruity in Large Vision-Language Models: Zhaoyang Li,

Zhan Ling,

Yuchen Zhou,

Litian Gong,

Erdem Biyik,

Hao Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhaoyang and Ling, Zhan and Zhou, Yuchen and Gong, Litian and Biyik, Erdem and Su, Hao}, title = {ORIC: Benchmarking Object Recognition under Contextual Incongruity in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23673-23684} }
2D-LFM: Lifting Foundation Model without 3D Supervision: Mosam Dabhi,

Irhas Gill,

László A. Jeni,

Simon Lucey; [pdf] [supp]
[bibtex]
@InProceedings{Dabhi_2026_CVPR, author = {Dabhi, Mosam and Gill, Irhas and Jeni, L\'aszl\'o A. and Lucey, Simon}, title = {2D-LFM: Lifting Foundation Model without 3D Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34303-34311} }
QD-PCQA: Quality-Aware Domain Adaptation for Point Cloud Quality Assessment: Guohua Zhang,

Jian Jin,

Meiqin Liu,

Chao Yao,

Weisi Lin; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohua and Jin, Jian and Liu, Meiqin and Yao, Chao and Lin, Weisi}, title = {QD-PCQA: Quality-Aware Domain Adaptation for Point Cloud Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17143-17152} }
Bi-Bridge: Bidirectional Diffusion Bridges for Low-Light Image Enhancement: Zeyu Hua,

Hui Li,

Yu Wang,

Song Wang,

Congchao Zhu,

Caixia Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Hua_2026_CVPR, author = {Hua, Zeyu and Li, Hui and Wang, Yu and Wang, Song and Zhu, Congchao and Zheng, Caixia}, title = {Bi-Bridge: Bidirectional Diffusion Bridges for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37455-37464} }
Gradient Knows Best: Mixed-Precision Quantization via Gradient-Guided Bit Allocation for Super-Resolution: Jun Young Kim,

Joo Hyeon Jeon,

Sangyeon Ahn,

Yoonseo Park,

Yong Seok Oh,

Bogyeong Kim,

Sung In Cho; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jun Young and Jeon, Joo Hyeon and Ahn, Sangyeon and Park, Yoonseo and Oh, Yong Seok and Kim, Bogyeong and Cho, Sung In}, title = {Gradient Knows Best: Mixed-Precision Quantization via Gradient-Guided Bit Allocation for Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16355-16364} }
StaMo: Unsupervised Learning of Generalizable Robot Motion from Compact State Representation: Mingyu Liu,

Jiuhe Shu,

Hui Chen,

Zeju Li,

Canyu Zhao,

Jiange Yang,

Shenyuan Gao,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mingyu and Shu, Jiuhe and Chen, Hui and Li, Zeju and Zhao, Canyu and Yang, Jiange and Gao, Shenyuan and Chen, Hao and Shen, Chunhua}, title = {StaMo: Unsupervised Learning of Generalizable Robot Motion from Compact State Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35014-35024} }
Mind the Way You Select Negative Texts: Pursuing the Distance Consistency in OOD Detection with VLMs: Zhikang Xu,

Qianqian Xu,

Zitai Wang,

Cong Hua,

Sicong Li,

Zhiyong Yang,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhikang and Xu, Qianqian and Wang, Zitai and Hua, Cong and Li, Sicong and Yang, Zhiyong and Huang, Qingming}, title = {Mind the Way You Select Negative Texts: Pursuing the Distance Consistency in OOD Detection with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34650-34661} }
Pixels Don't Lie (But Your Detector Might): Bootstrapping MLLM-as-a-Judge for Trustworthy Deepfake Detection and Reasoning Supervision: Kartik Kuckreja,

Parul Gupta,

Muhammad Haris Khan,

Abhinav Dhall; [pdf] [supp]
[bibtex]
@InProceedings{Kuckreja_2026_CVPR, author = {Kuckreja, Kartik and Gupta, Parul and Khan, Muhammad Haris and Dhall, Abhinav}, title = {Pixels Don't Lie (But Your Detector Might): Bootstrapping MLLM-as-a-Judge for Trustworthy Deepfake Detection and Reasoning Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25871-25881} }
DualPrim: Compact 3D Reconstruction with Positive and Negative Primitives: Xiaoxu Meng,

Zhongmin Chen,

Bo Yang,

Weikai Chen,

Weixiao Liu,

Lin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Xiaoxu and Chen, Zhongmin and Yang, Bo and Chen, Weikai and Liu, Weixiao and Gao, Lin}, title = {DualPrim: Compact 3D Reconstruction with Positive and Negative Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29081-29091} }
TagSplat: Topology-Aware Gaussian Splatting for Dynamic Mesh Modeling and Tracking: Hanzhi Guo,

Dongdong Weng,

Mo Su,

Yixiao Chen,

Xiaonuo Dongye,

Chenyu Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhi and Weng, Dongdong and Su, Mo and Chen, Yixiao and Dongye, Xiaonuo and Xu, Chenyu}, title = {TagSplat: Topology-Aware Gaussian Splatting for Dynamic Mesh Modeling and Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40971-40980} }
RHCNet: Residual-Guided Hierarchical Calibration Network for Robust Underwater Object Detection: Yueying Wang,

Yiteng Guo,

Weidong Zhang,

Jie Wen,

Liquan Shen,

Huaicheng Yan,

Xin Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yueying and Guo, Yiteng and Zhang, Weidong and Wen, Jie and Shen, Liquan and Yan, Huaicheng and Xu, Xin}, title = {RHCNet: Residual-Guided Hierarchical Calibration Network for Robust Underwater Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4393-4402} }
VS-Bench: Evaluating VLMs for Strategic Abilities in Multi-Agent Environments: Zelai Xu,

Zhexuan Xu,

Xiangmin Yi,

Huining Yuan,

Mo Guang,

Kaiwen Long,

Xinlei Chen,

Yi Wu,

Chao Yu,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zelai and Xu, Zhexuan and Yi, Xiangmin and Yuan, Huining and Guang, Mo and Long, Kaiwen and Chen, Xinlei and Wu, Yi and Yu, Chao and Wang, Yu}, title = {VS-Bench: Evaluating VLMs for Strategic Abilities in Multi-Agent Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21561-21572} }
Machine Unlearning via Adaptive Gradient Reweighting and Multi-stage Objective Optimization: Juxin Lu,

Haoyu Shi,

Mengyao Wang,

Huaiwen Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Juxin and Shi, Haoyu and Wang, Mengyao and Zhang, Huaiwen}, title = {Machine Unlearning via Adaptive Gradient Reweighting and Multi-stage Objective Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39403-39413} }
CURVE: A Benchmark for Cultural and Multilingual Long Video Reasoning: Darshan Singh,

Arsha Nagrani,

Kawshik Manikantan,

Harman Singh,

Dinesh Tewari,

Tobias Weyand,

Cordelia Schmid,

Anelia Angelova,

Shachi Dave; [pdf] [supp]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Darshan and Nagrani, Arsha and Manikantan, Kawshik and Singh, Harman and Tewari, Dinesh and Weyand, Tobias and Schmid, Cordelia and Angelova, Anelia and Dave, Shachi}, title = {CURVE: A Benchmark for Cultural and Multilingual Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32860-32871} }
Diffusion-Based Native Adversarial Synthesis for Enhanced Medical Segmentation Generalization: Hongyu Zhang,

Haipeng Chen,

Zhimin Xu,

Chengxin Yang,

Yingda Lyu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hongyu and Chen, Haipeng and Xu, Zhimin and Yang, Chengxin and Lyu, Yingda}, title = {Diffusion-Based Native Adversarial Synthesis for Enhanced Medical Segmentation Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1461-1471} }
Proxy-Tuning: Tailoring Multimodal Autoregressive Models for Subject-Driven Image Generation: Yi Wu,

Shengju Qian,

Lingting Zhu,

Lei Liu,

Wandi Qiao,

Ziqiang Li,

Lequan Yu,

Bin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yi and Qian, Shengju and Zhu, Lingting and Liu, Lei and Qiao, Wandi and Li, Ziqiang and Yu, Lequan and Li, Bin}, title = {Proxy-Tuning: Tailoring Multimodal Autoregressive Models for Subject-Driven Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43331-43340} }
CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation: Mainak Singha,

Sarthak Mehrotra,

Paolo Casari,

Subhasis Chaudhuri,

Elisa Ricci,

Biplab Banerjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singha_2026_CVPR, author = {Singha, Mainak and Mehrotra, Sarthak and Casari, Paolo and Chaudhuri, Subhasis and Ricci, Elisa and Banerjee, Biplab}, title = {CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9582-9592} }
Missing No More: Dictionary-Guided Cross-Modal Image Fusion under Missing Infrared: Yafei Zhang,

Meng Ma,

Huafeng Li,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yafei and Ma, Meng and Li, Huafeng and Liu, Yu}, title = {Missing No More: Dictionary-Guided Cross-Modal Image Fusion under Missing Infrared}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19549-19558} }
Hyperbolic Defect Feature Synthesis for Few-Shot Defect Classification: Huimin Li,

Boxuan Hu,

Yulin Zhang,

Xiuzhuang Zhou,

Junlin Hu; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Huimin and Hu, Boxuan and Zhang, Yulin and Zhou, Xiuzhuang and Hu, Junlin}, title = {Hyperbolic Defect Feature Synthesis for Few-Shot Defect Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19602-19612} }
TimeLens: Rethinking Video Temporal Grounding with Multimodal LLMs: Jun Zhang,

Teng Wang,

Yuying Ge,

Yixiao Ge,

Xinhao Li,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jun and Wang, Teng and Ge, Yuying and Ge, Yixiao and Li, Xinhao and Wang, Limin}, title = {TimeLens: Rethinking Video Temporal Grounding with Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10419-10429} }
HVG-3D: Bridging Real and Simulation Domains for 3D-Conditional Hand-Object Interaction Video Synthesis: Mingjin Chen,

Junhao Chen,

Zhaoxin Fan,

Yujian Lee,

Zichen Dang,

Lili Wang,

Yawen Cui,

Lap-Pui Chau,

Yi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Mingjin and Chen, Junhao and Fan, Zhaoxin and Lee, Yujian and Dang, Zichen and Wang, Lili and Cui, Yawen and Chau, Lap-Pui and Wang, Yi}, title = {HVG-3D: Bridging Real and Simulation Domains for 3D-Conditional Hand-Object Interaction Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15986-15997} }
PropFly: Learning to Propagate via On-the-Fly Supervision from Pre-trained Video Diffusion Models: Wonyong Seo,

Jaeho Moon,

Jaehyup Lee,

Soo Ye Kim,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Wonyong and Moon, Jaeho and Lee, Jaehyup and Kim, Soo Ye and Kim, Munchurl}, title = {PropFly: Learning to Propagate via On-the-Fly Supervision from Pre-trained Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43228-43238} }
Qwen-Image-Layered: Towards Inherent Editability via Layer Decomposition: Shengming Yin,

Zekai Zhang,

Zecheng Tang,

Kaiyuan Gao,

Xiao Xu,

Kun Yan,

Jiahao Li,

Yilei Chen,

Yuxiang Chen,

Heung-Yeung Shum,

Lionel M. Ni,

Junyang Lin,

Chenfei Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Shengming and Zhang, Zekai and Tang, Zecheng and Gao, Kaiyuan and Xu, Xiao and Yan, Kun and Li, Jiahao and Chen, Yilei and Chen, Yuxiang and Shum, Heung-Yeung and Ni, Lionel M. and Lin, Junyang and Wu, Chenfei}, title = {Qwen-Image-Layered: Towards Inherent Editability via Layer Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16196-16205} }
LocateAnything3D: Vision-Language 3D Detection with Chain-of-Sight: Yunze Man,

Shihao Wang,

Guowen Zhang,

Johan Bjorck,

Liang-Yan Gui,

Jim Fan,

Jan Kautz,

Yu-Xiong Wang,

Zhiding Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Man_2026_CVPR, author = {Man, Yunze and Wang, Shihao and Zhang, Guowen and Bjorck, Johan and Gui, Liang-Yan and Fan, Jim and Kautz, Jan and Wang, Yu-Xiong and Yu, Zhiding}, title = {LocateAnything3D: Vision-Language 3D Detection with Chain-of-Sight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31089-31102} }
Dictionary-Aligned Concept Control for Safeguarding Multimodal LLMs: Jinqi Luo,

Jinyu Yang,

Tal Neiman,

Lei Fan,

Bing Yin,

Son Tran,

Mubarak Shah,

René Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Jinqi and Yang, Jinyu and Neiman, Tal and Fan, Lei and Yin, Bing and Tran, Son and Shah, Mubarak and Vidal, Ren\'e}, title = {Dictionary-Aligned Concept Control for Safeguarding Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15815-15828} }
ViBES: A Conversational Agent with Behaviorally-Intelligent 3D Virtual Body: Juze Zhang,

Changan Chen,

Xin Chen,

Heng Yu,

Tiange Xiang,

Ali Sartaz Khan,

Shrinidhi K. Lakshmikanth,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Juze and Chen, Changan and Chen, Xin and Yu, Heng and Xiang, Tiange and Khan, Ali Sartaz and Lakshmikanth, Shrinidhi K. and Adeli, Ehsan}, title = {ViBES: A Conversational Agent with Behaviorally-Intelligent 3D Virtual Body}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39994-40008} }
Bridging Facial Understanding and Animation via Language Models: Luchuan Song,

Pinxin Liu,

Haiyang Liu,

Zhenchao Jin,

Yolo Yunlong Tang,

Zichong Xu,

Susan Liang,

Jing Bi,

Jason J Corso,

Chenliang Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Luchuan and Liu, Pinxin and Liu, Haiyang and Jin, Zhenchao and Tang, Yolo Yunlong and Xu, Zichong and Liang, Susan and Bi, Jing and Corso, Jason J and Xu, Chenliang}, title = {Bridging Facial Understanding and Animation via Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17557-17567} }
HypeVPR: Exploring Hyperbolic Space for Perspective to Equirectangular Visual Place Recognition: Suhan Woo,

Seongwon Lee,

Jinwoo Jang,

Euntai Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Woo_2026_CVPR, author = {Woo, Suhan and Lee, Seongwon and Jang, Jinwoo and Kim, Euntai}, title = {HypeVPR: Exploring Hyperbolic Space for Perspective to Equirectangular Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12183-12192} }
Image-based Outlier Synthesis With Training Data: Sudarshan Regmi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Regmi_2026_CVPR, author = {Regmi, Sudarshan}, title = {Image-based Outlier Synthesis With Training Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39338-39350} }
VGA: Empowering Aerial-Ground Localization by Visual Geometry Alignment: Tao Jun Lin,

Yujiao Shi,

Hongdong Li; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Tao Jun and Shi, Yujiao and Li, Hongdong}, title = {VGA: Empowering Aerial-Ground Localization by Visual Geometry Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5409-5420} }
ProactiveMobile: A Comprehensive Benchmark for Boosting Proactive Intelligence On Mobile Devices: Dezhi Kong,

Zhengzhao Feng,

Qiliang Liang,

Hao Wang,

Haofei Sun,

Changpeng Yang,

Yang Li,

Peng Zhou,

Shuai Nie,

Hongzhen Wang,

Linfeng Zhou,

Hao Jia,

Jiaming Xu,

Runyu Shi,

Ying Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Dezhi and Feng, Zhengzhao and Liang, Qiliang and Wang, Hao and Sun, Haofei and Yang, Changpeng and Li, Yang and Zhou, Peng and Nie, Shuai and Wang, Hongzhen and Zhou, Linfeng and Jia, Hao and Xu, Jiaming and Shi, Runyu and Huang, Ying}, title = {ProactiveMobile: A Comprehensive Benchmark for Boosting Proactive Intelligence On Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27503-27513} }
AwareVLN: Reasoning with Self-awareness for Vision-Language Navigation: Wenxuan Guo,

Xiuwei Xu,

Yichen Liu,

Xiangyu Li,

Hang Yin,

Huangxing Chen,

Wenzhao Zheng,

Jianjiang Feng,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Wenxuan and Xu, Xiuwei and Liu, Yichen and Li, Xiangyu and Yin, Hang and Chen, Huangxing and Zheng, Wenzhao and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {AwareVLN: Reasoning with Self-awareness for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4065-4075} }
Thermal-Det: Language-Guided Cross-Modal Distillation for Open-Vocabulary Thermal Object Detection: Yasiru Ranasinghe,

Elim Schenck,

Florence Yellin,

Shuowen Hu,

Christopher Funk,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranasinghe_2026_CVPR, author = {Ranasinghe, Yasiru and Schenck, Elim and Yellin, Florence and Hu, Shuowen and Funk, Christopher and Patel, Vishal M.}, title = {Thermal-Det: Language-Guided Cross-Modal Distillation for Open-Vocabulary Thermal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34628-34637} }
Thinking-while-Generating: Interleaving Textual Reasoning throughout Visual Generation: Ziyu Guo,

Renrui Zhang,

Hongyu Li,

Manyuan Zhang,

Xinyan Chen,

Sifan Wang,

Yan Feng,

Peng Pei,

Pheng-Ann Heng; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyu and Zhang, Renrui and Li, Hongyu and Zhang, Manyuan and Chen, Xinyan and Wang, Sifan and Feng, Yan and Pei, Peng and Heng, Pheng-Ann}, title = {Thinking-while-Generating: Interleaving Textual Reasoning throughout Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26295-26305} }
Adaptive Spatial-Temporal Window: Unlocking the Potential of Event Cameras in Heterogeneous Velocity Scenarios: Zhipeng Sui,

Haiqing Hao,

Weihua He,

Seng-Hong Lee,

Wenhui Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sui_2026_CVPR, author = {Sui, Zhipeng and Hao, Haiqing and He, Weihua and Lee, Seng-Hong and Wang, Wenhui}, title = {Adaptive Spatial-Temporal Window: Unlocking the Potential of Event Cameras in Heterogeneous Velocity Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {946-955} }
DLWM: Dual Latent World Models enable Holistic Gaussian-centric Pre-training in Autonomous Driving: Yiyao Zhu,

Ying Xue,

Haiming Zhang,

Guangfeng Jiang,

Wending Zhou,

Xu Yan,

Jiantao Gao,

Yingjie Cai,

Bingbing Liu,

Zhen Li,

Shaojie Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yiyao and Xue, Ying and Zhang, Haiming and Jiang, Guangfeng and Zhou, Wending and Yan, Xu and Gao, Jiantao and Cai, Yingjie and Liu, Bingbing and Li, Zhen and Shen, Shaojie}, title = {DLWM: Dual Latent World Models enable Holistic Gaussian-centric Pre-training in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39713-39723} }
Dual-Granularity Memory for Efficient Video Generation: Hongjun Wang,

Lin Liu,

Jianguo Li,

Tao Lin; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hongjun and Liu, Lin and Li, Jianguo and Lin, Tao}, title = {Dual-Granularity Memory for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38016-38026} }
RS-SSM: Refining Forgotten Specifics in State Space Model for Video Semantic Segmentation: Kai Zhu,

Zhenyu Cui,

Zehua Zang,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kai and Cui, Zhenyu and Zang, Zehua and Zhou, Jiahuan}, title = {RS-SSM: Refining Forgotten Specifics in State Space Model for Video Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10741-10752} }
VinQA: Visual Elements Interleaved Long-form Answer Generation for Real-World Multimodal Document QA: Young Rok Jang,

Hyesoo Kong,

Kyunghwan An,

Jae Sub Huh,

Gyeonghun KIM,

Stanley Jungkyu Choi; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Young Rok and Kong, Hyesoo and An, Kyunghwan and Huh, Jae Sub and KIM, Gyeonghun and Choi, Stanley Jungkyu}, title = {VinQA: Visual Elements Interleaved Long-form Answer Generation for Real-World Multimodal Document QA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41130-41139} }
RAG-TP: A General Framework for Vehicle Trajectory Prediction via Retrieval-Augmented Generation: Ziyi Wang,

Yang Zhang,

Guijian Tang,

Chao Zhang,

Shibo Zhang,

Xueqiong Li,

Shaowu Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyi and Zhang, Yang and Tang, Guijian and Zhang, Chao and Zhang, Shibo and Li, Xueqiong and Yang, Shaowu}, title = {RAG-TP: A General Framework for Vehicle Trajectory Prediction via Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24865-24874} }
Re-Align: Structured Reasoning-guided Alignment for In-Context Image Generation and Editing: Runze He,

Yiji Cheng,

Tiankai Hang,

Zhimin Li,

Yu Xu,

Zijin Yin,

Shiyi Zhang,

Wenxun Dai,

Penghui Du,

Ao Ma,

Chunyu Wang,

Qinglin Lu,

Jizhong Han,

Jiao Dai; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Runze and Cheng, Yiji and Hang, Tiankai and Li, Zhimin and Xu, Yu and Yin, Zijin and Zhang, Shiyi and Dai, Wenxun and Du, Penghui and Ma, Ao and Wang, Chunyu and Lu, Qinglin and Han, Jizhong and Dai, Jiao}, title = {Re-Align: Structured Reasoning-guided Alignment for In-Context Image Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9051-9062} }
A Bit is All You Need! Efficient Video Capture via Single Bit Imaging: Kanchana Vaishnavi Gandikota,

Michael Moeller,

Andreas Kolb,

Bhaskar Choubey,

Paramanand Chandramouli; [pdf] [supp]
[bibtex]
@InProceedings{Gandikota_2026_CVPR, author = {Gandikota, Kanchana Vaishnavi and Moeller, Michael and Kolb, Andreas and Choubey, Bhaskar and Chandramouli, Paramanand}, title = {A Bit is All You Need! Efficient Video Capture via Single Bit Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34016-34027} }
BuildingGPT: Auto-Regressive Building Wireframe Reconstruction Model with Reinforcement Learning: Yuzhou Liu,

Lingjie Zhu,

Hanqiao Ye,

Yujun Liu,

Shangfeng Huang,

Xiang Gao,

Ruisheng Wang,

Shuhan Shen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuzhou and Zhu, Lingjie and Ye, Hanqiao and Liu, Yujun and Huang, Shangfeng and Gao, Xiang and Wang, Ruisheng and Shen, Shuhan}, title = {BuildingGPT: Auto-Regressive Building Wireframe Reconstruction Model with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36400-36410} }
PositionIC: Unified Position and Identity Consistency for Image Customization: Junjie Hu,

Tianyang Han,

Kai Ma,

Jialin Gao,

Yang Song,

Xianhua He,

Junfeng Luo,

Xiaoming Wei,

Wenqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Junjie and Han, Tianyang and Ma, Kai and Gao, Jialin and Song, Yang and He, Xianhua and Luo, Junfeng and Wei, Xiaoming and Zhang, Wenqiang}, title = {PositionIC: Unified Position and Identity Consistency for Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9139-9148} }
Beyond Appearance: Camouflaged Object Detection via Geometric Structure: Jinyu Han,

Changguang Wu,

Fuming Sun,

Jinhui Tang; [pdf]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jinyu and Wu, Changguang and Sun, Fuming and Tang, Jinhui}, title = {Beyond Appearance: Camouflaged Object Detection via Geometric Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25830-25840} }
CLIP Is Shortsighted: Paying Attention Beyond the First Sentence: Marc-Antoine Lavoie,

Anas Mahmoud,

Aldo Zaimi,

Arsene Fansi Tchango,

Steven L. Waslander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lavoie_2026_CVPR, author = {Lavoie, Marc-Antoine and Mahmoud, Anas and Zaimi, Aldo and Tchango, Arsene Fansi and Waslander, Steven L.}, title = {CLIP Is Shortsighted: Paying Attention Beyond the First Sentence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9524-9534} }
Do Vision-Language Models Leak What They Learn? Adaptive Token-Weighted Model Inversion Attacks: Ngoc-Bao Nguyen,

Sy-Tuyen Ho,

Koh Jun Hao,

Ngai-Man Cheung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Ngoc-Bao and Ho, Sy-Tuyen and Hao, Koh Jun and Cheung, Ngai-Man}, title = {Do Vision-Language Models Leak What They Learn? Adaptive Token-Weighted Model Inversion Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10283-10292} }
Unlocking the Power of Critical Factors for 3D Visual Geometry Estimation: Guangkai Xu,

Hua Geng,

Huanyi Zheng,

Songyi Yin,

Yanlong Sun,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guangkai and Geng, Hua and Zheng, Huanyi and Yin, Songyi and Sun, Yanlong and Chen, Hao and Shen, Chunhua}, title = {Unlocking the Power of Critical Factors for 3D Visual Geometry Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28979-28989} }
No Need For Real Anomaly: MLLM Empowered Zero-Shot Video Anomaly Detection: Zunkai Dai,

Ke Li,

Jiajia Liu,

Jie Yang,

Yuanyuan Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Zunkai and Li, Ke and Liu, Jiajia and Yang, Jie and Qiao, Yuanyuan}, title = {No Need For Real Anomaly: MLLM Empowered Zero-Shot Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35648-35658} }
LottieGPT: Tokenizing Vector Animation for Autoregressive Generation: Junhao Chen,

Kejun Gao,

Yuehan Cui,

Mingze Sun,

Mingjin Chen,

Shaohui Wang,

Xiaoxiao Long,

Fei Ma,

Qi Tian,

Hao Zhao,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junhao and Gao, Kejun and Cui, Yuehan and Sun, Mingze and Chen, Mingjin and Wang, Shaohui and Long, Xiaoxiao and Ma, Fei and Tian, Qi and Zhao, Hao and Huang, Ruqi}, title = {LottieGPT: Tokenizing Vector Animation for Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31639-31651} }
Revisiting Geometric Obfuscation with Dual Convergent Lines for Privacy-Preserving Image Queries in Visual Localization: Jeonggon Kim,

Heejoon Moon,

Je Hyeong Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonggon and Moon, Heejoon and Hong, Je Hyeong}, title = {Revisiting Geometric Obfuscation with Dual Convergent Lines for Privacy-Preserving Image Queries in Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {113-123} }
Matching Every Pair to Track Every Point: PairFormer for All-Pairs Tracking and Video Trajectory Fields: Guangyang Wu,

Youran Ding,

Xinyu Che,

Benyuan Sun,

Yi Yang,

Xiaohong Liu; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Guangyang and Ding, Youran and Che, Xinyu and Sun, Benyuan and Yang, Yi and Liu, Xiaohong}, title = {Matching Every Pair to Track Every Point: PairFormer for All-Pairs Tracking and Video Trajectory Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35187-35196} }
Upsample Anything: A Simple and Hard to Beat Baseline for Feature Upsampling: Minseok Seo,

Mark Hamilton,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Minseok and Hamilton, Mark and Kim, Changick}, title = {Upsample Anything: A Simple and Hard to Beat Baseline for Feature Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29707-29716} }
Sparsity-Aware Voxel Attention and Foreground Modulation for 3D Semantic Scene Completion: Yu Xue,

Longjun Gao,

Yuanqi Su,

HaoAng Lu,

Xiaoning Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Yu and Gao, Longjun and Su, Yuanqi and Lu, HaoAng and Zhang, Xiaoning}, title = {Sparsity-Aware Voxel Attention and Foreground Modulation for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5751-5761} }
ShapeR: Robust Conditional 3D Shape Generation from Casual Captures: Yawar Siddiqui,

Duncan Frost,

Samir Aroudj,

Armen Avetisyan,

Henry Howard-Jenkins,

Daniel DeTone,

Pierre Moulon,

Qirui Wu,

Zhengqin Li,

Julian Straub,

Richard Newcombe,

Jakob Engel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Siddiqui_2026_CVPR, author = {Siddiqui, Yawar and Frost, Duncan and Aroudj, Samir and Avetisyan, Armen and Howard-Jenkins, Henry and DeTone, Daniel and Moulon, Pierre and Wu, Qirui and Li, Zhengqin and Straub, Julian and Newcombe, Richard and Engel, Jakob}, title = {ShapeR: Robust Conditional 3D Shape Generation from Casual Captures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27157-27168} }
Exploring Conditions for Diffusion Models in Robotic Control: Heeseong Shin,

Byeongho Heo,

Dongyoon Han,

Seungryong Kim,

Taekyung Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Heeseong and Heo, Byeongho and Han, Dongyoon and Kim, Seungryong and Kim, Taekyung}, title = {Exploring Conditions for Diffusion Models in Robotic Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27967-27977} }
TV2TV: A Unified Framework for Interleaved Language and Video Generation: Xiaochuang Han,

Youssef Emad,

Melissa Hall,

John Nguyen,

Karthik Padthe,

Liam Robbins,

Amir Bar,

Delong Chen,

Michal Drozdzal,

Maha Elbayad,

Yushi Hu,

Shang-Wen Li,

Jakob Verbeek,

XuDong Wang,

Marjan Ghazvininejad,

Luke Zettlemoyer,

Emily Dinan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xiaochuang and Emad, Youssef and Hall, Melissa and Nguyen, John and Padthe, Karthik and Robbins, Liam and Bar, Amir and Chen, Delong and Drozdzal, Michal and Elbayad, Maha and Hu, Yushi and Li, Shang-Wen and Verbeek, Jakob and Wang, XuDong and Ghazvininejad, Marjan and Zettlemoyer, Luke and Dinan, Emily}, title = {TV2TV: A Unified Framework for Interleaved Language and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7695-7706} }
Pose-Free Omnidirectional Gaussian Splatting for 360-Degree Videos with Consistent Depth Priors: Chuanqing Zhuang,

Xin Lu,

Zehui Deng,

Zhengda Lu,

Yiqun Wang,

Junqi Diao,

Jun Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Chuanqing and Lu, Xin and Deng, Zehui and Lu, Zhengda and Wang, Yiqun and Diao, Junqi and Xiao, Jun}, title = {Pose-Free Omnidirectional Gaussian Splatting for 360-Degree Videos with Consistent Depth Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4881-4890} }
LESA: Learnable Stage-Aware Predictors for Diffusion Model Acceleration: Peiliang Cai,

Jiacheng Liu,

Haowen Xu,

Xinyu Wang,

Chang Zou,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Peiliang and Liu, Jiacheng and Xu, Haowen and Wang, Xinyu and Zou, Chang and Zhang, Linfeng}, title = {LESA: Learnable Stage-Aware Predictors for Diffusion Model Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43300-43309} }
ObjectMorpher: 3D-Aware Image Editing via Deformable 3DGS: Yuhuan Xie,

Aoxuan Pan,

Yi-Hua Huang,

Chirui Chang,

Peng Dai,

Xin Yu,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yuhuan and Pan, Aoxuan and Huang, Yi-Hua and Chang, Chirui and Dai, Peng and Yu, Xin and Qi, Xiaojuan}, title = {ObjectMorpher: 3D-Aware Image Editing via Deformable 3DGS}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5828-5838} }
Residual Connections Harm Generative Representation Learning: Xiao Zhang,

Ruoxi Jiang,

William Gao,

Rebecca Willet,

Michael Maire; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiao and Jiang, Ruoxi and Gao, William and Willet, Rebecca and Maire, Michael}, title = {Residual Connections Harm Generative Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39669-39679} }
Learning What Matters: Prioritized Concept Learning via Relative Error-driven Sample Selection: Qian Yang,

Shivam Chandhok,

Oscar Mañas,

Kanishk Jain,

Aishwarya Agrawal,

Leonid Sigal; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Qian and Chandhok, Shivam and Ma\~nas, Oscar and Jain, Kanishk and Agrawal, Aishwarya and Sigal, Leonid}, title = {Learning What Matters: Prioritized Concept Learning via Relative Error-driven Sample Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15794-15804} }
SANER: Switchable Adapter with Non-parametric Enhanced Routing for Person De-Reidentification: Yimin Liu,

Nan Pu,

Fengxiang Yang,

Wenjing Li,

Zhihui Li,

Zhun Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yimin and Pu, Nan and Yang, Fengxiang and Li, Wenjing and Li, Zhihui and Zhong, Zhun}, title = {SANER: Switchable Adapter with Non-parametric Enhanced Routing for Person De-Reidentification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40376-40385} }
Learning to Focus and Precise Cropping:A Reinforcement Learning Framework with Information Gaps and Grounding Loss for MLLMs: Xuanpu Zhao,

Zhentao Tan,

Dianmo Sheng,

Tianxiang Chen,

Yao Liu,

Yue Wu,

Tao Gong,

Qi Chu,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xuanpu and Tan, Zhentao and Sheng, Dianmo and Chen, Tianxiang and Liu, Yao and Wu, Yue and Gong, Tao and Chu, Qi and Yu, Nenghai}, title = {Learning to Focus and Precise Cropping:A Reinforcement Learning Framework with Information Gaps and Grounding Loss for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25938-25947} }
SeaCache: Spectral-Evolution-Aware Cache for Accelerating Diffusion Models: Jiwoo Chung,

Sangeek Hyun,

MinKyu Lee,

Byeongju Han,

Geonho Cha,

Dongyoon Wee,

Youngjun Hong,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2026_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Lee, MinKyu and Han, Byeongju and Cha, Geonho and Wee, Dongyoon and Hong, Youngjun and Heo, Jae-Pil}, title = {SeaCache: Spectral-Evolution-Aware Cache for Accelerating Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14283-14294} }
SafeRoPE: Risk-specific Head-wise Embedding Rotation for Safe Generation in Rectified Flow Transformers: Xiang Yang,

Feifei Li,

Mi Zhang,

Geng Hong,

Xiaoyu You,

Min Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiang and Li, Feifei and Zhang, Mi and Hong, Geng and You, Xiaoyu and Yang, Min}, title = {SafeRoPE: Risk-specific Head-wise Embedding Rotation for Safe Generation in Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {690-700} }
Self-Paced and Self-Corrective Masked Prediction for Movie Trailer Generation: Sidan Zhu,

Hongteng Xu,

Dixin Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Sidan and Xu, Hongteng and Luo, Dixin}, title = {Self-Paced and Self-Corrective Masked Prediction for Movie Trailer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7684-7694} }
LangRef3DGS: Natural Language-Guided 3D Referential Segmentation from Partial Observations via 3D Gaussian Splatting: Xulun Ye,

Qin Zhang,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Zhang, Qin and Zhou, Kun}, title = {LangRef3DGS: Natural Language-Guided 3D Referential Segmentation from Partial Observations via 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38595-38605} }
GenErase: Generalizable and Semantically-Aware Concept Erasure in Diffusion Models: Korada Sri Vardhana,

Soma Biswas; [pdf] [supp]
[bibtex]
@InProceedings{Vardhana_2026_CVPR, author = {Vardhana, Korada Sri and Biswas, Soma}, title = {GenErase: Generalizable and Semantically-Aware Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2327-2335} }
RawMetaDiff: Unlocking Extreme Darkness from Dual-Exposure RAW with Meta-Guided Diffusion: Panjun Liu,

Jiyuan Xia,

Yuanshen Guan,

Yong Li,

Zhiqiang Lang,

Ruikang Xu,

Chang Chen,

Dehua Song,

Fenglong Song,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Panjun and Xia, Jiyuan and Guan, Yuanshen and Li, Yong and Lang, Zhiqiang and Xu, Ruikang and Chen, Chang and Song, Dehua and Song, Fenglong and Xiong, Zhiwei}, title = {RawMetaDiff: Unlocking Extreme Darkness from Dual-Exposure RAW with Meta-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5617-5626} }
VideoRealBench: A Chain-of-Thought Realism Evaluation Benchmark for Generated Human-Centric Videos: Min Yang,

Xinwen Zhang,

Jialei Tang,

Xin Zhou,

Kehan Li,

Zeyi Huang,

Limin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Min and Zhang, Xinwen and Tang, Jialei and Zhou, Xin and Li, Kehan and Huang, Zeyi and Wang, Limin}, title = {VideoRealBench: A Chain-of-Thought Realism Evaluation Benchmark for Generated Human-Centric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18565-18575} }
Premier: Personalized Preference Modulation with Learnable User Embedding in Text-to-Image Generation: Zihao Wang,

Yuxiang Wei,

Xinpeng Zhou,

Tianyu Zhang,

Tao Liang,

Yalong Bai,

Hongzhi Zhang,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihao and Wei, Yuxiang and Zhou, Xinpeng and Zhang, Tianyu and Liang, Tao and Bai, Yalong and Zhang, Hongzhi and Zuo, Wangmeng}, title = {Premier: Personalized Preference Modulation with Learnable User Embedding in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29146-29156} }
NimbusGS: Unified 3D Scene Reconstruction under Hybrid Weather: Yanying Li,

Jinyang Li,

Shengfeng He,

Yangyang Xu,

Junyu Dong,

Yong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanying and Li, Jinyang and He, Shengfeng and Xu, Yangyang and Dong, Junyu and Du, Yong}, title = {NimbusGS: Unified 3D Scene Reconstruction under Hybrid Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5038-5048} }
Computer Vision with a Superpixelation Camera: Sasidharan Mahalingam,

Rachel Brown,

Atul Ingle; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mahalingam_2026_CVPR, author = {Mahalingam, Sasidharan and Brown, Rachel and Ingle, Atul}, title = {Computer Vision with a Superpixelation Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41773-41783} }
Elastic3D: Controllable Stereo Video Conversion with Guided Latent Decoding: Nando Metzger,

Prune Truong,

Goutam Bhat,

Konrad Schindler,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Metzger_2026_CVPR, author = {Metzger, Nando and Truong, Prune and Bhat, Goutam and Schindler, Konrad and Tombari, Federico}, title = {Elastic3D: Controllable Stereo Video Conversion with Guided Latent Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32693-32703} }
gQIR: Generative Quanta Image Reconstruction: Aryan Garg,

Sizhuo Ma,

Mohit Gupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garg_2026_CVPR, author = {Garg, Aryan and Ma, Sizhuo and Gupta, Mohit}, title = {gQIR: Generative Quanta Image Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19759-19770} }
DuetMerging: Synergizing Dynamic and Static Strategies for Mitigating Task Interference in Model Merging: Yan Li,

Guiping Cao,

Yaguang Song,

Ming Tao,

Haoran Gong,

Junhui Liu,

Yaowei Wang,

Dongmei Jiang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Cao, Guiping and Song, Yaguang and Tao, Ming and Gong, Haoran and Liu, Junhui and Wang, Yaowei and Jiang, Dongmei}, title = {DuetMerging: Synergizing Dynamic and Static Strategies for Mitigating Task Interference in Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41954-41963} }
MatSpray: Fusing 2D Material World Knowledge on 3D Geometry: Philipp Langsteiner,

Jan-Niklas Dihlmann,

Hendrik Lensch; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Langsteiner_2026_CVPR, author = {Langsteiner, Philipp and Dihlmann, Jan-Niklas and Lensch, Hendrik}, title = {MatSpray: Fusing 2D Material World Knowledge on 3D Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22615-22625} }
ViRC: Enhancing Visual Interleaved Mathematical CoT with Reason Chunking: Lihong Wang,

Liangqi Li,

Weiwei Feng,

Jiamin Wu,

Changtao Miao,

Tieru Wu,

Rui Ma,

Bo Zhang,

Zhe Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lihong and Li, Liangqi and Feng, Weiwei and Wu, Jiamin and Miao, Changtao and Wu, Tieru and Ma, Rui and Zhang, Bo and Li, Zhe}, title = {ViRC: Enhancing Visual Interleaved Mathematical CoT with Reason Chunking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26144-26153} }
FinPercep-RM: A Fine-grained Reward Model and Co-evolutionary Curriculum for RL-based Real-world Super-Resolution: Yidi Liu,

Zihao Fan,

Jie Huang,

Jie Xiao,

Dong Li,

Wenlong Zhang,

LEI BAI,

Xueyang Fu,

Zheng-jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yidi and Fan, Zihao and Huang, Jie and Xiao, Jie and Li, Dong and Zhang, Wenlong and BAI, LEI and Fu, Xueyang and Zha, Zheng-jun}, title = {FinPercep-RM: A Fine-grained Reward Model and Co-evolutionary Curriculum for RL-based Real-world Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4839-4849} }
UniTEX: Universal High Fidelity Generative Texturing for 3D Shapes: Yixun Liang,

Kunming Luo,

Xiao Chen,

Rui Chen,

Hongyu Yan,

Weiyu Li,

Jiarui Liu,

Fei-Peng Tian,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yixun and Luo, Kunming and Chen, Xiao and Chen, Rui and Yan, Hongyu and Li, Weiyu and Liu, Jiarui and Tian, Fei-Peng and Tan, Ping}, title = {UniTEX: Universal High Fidelity Generative Texturing for 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19917-19927} }
ARM-Thinker: Reinforcing Multimodal Generative Reward Models with Agentic Tool Use and Visual Reasoning: Shengyuan Ding,

Xinyu Fang,

Ziyu Liu,

Yuhang Zang,

Yuhang Cao,

Xiangyu Zhao,

Haodong Duan,

Xiaoyi Dong,

Jianze Liang,

Bin Wang,

Conghui He,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Shengyuan and Fang, Xinyu and Liu, Ziyu and Zang, Yuhang and Cao, Yuhang and Zhao, Xiangyu and Duan, Haodong and Dong, Xiaoyi and Liang, Jianze and Wang, Bin and He, Conghui and Lin, Dahua and Wang, Jiaqi}, title = {ARM-Thinker: Reinforcing Multimodal Generative Reward Models with Agentic Tool Use and Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22195-22205} }
Denoising as Path Planning: Training-Free Acceleration of Diffusion Models with DPCache: Bowen Cui,

Yuanbin Wang,

Huajiang Xu,

Biaolong Chen,

Aixi Zhang,

Hao Jiang,

Zhengzheng Jin,

Xu Liu,

Pipei Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Bowen and Wang, Yuanbin and Xu, Huajiang and Chen, Biaolong and Zhang, Aixi and Jiang, Hao and Jin, Zhengzheng and Liu, Xu and Huang, Pipei}, title = {Denoising as Path Planning: Training-Free Acceleration of Diffusion Models with DPCache}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43632-43642} }
GPFlow: Gaussian Prototype Probability Flow for Unsupervised Multi-Modal Anomaly Detection: Yiting Li,

Xulei Yang,

Jingyi Liao,

Jing Zhang,

Fayao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yiting and Yang, Xulei and Liao, Jingyi and Zhang, Jing and Liu, Fayao}, title = {GPFlow: Gaussian Prototype Probability Flow for Unsupervised Multi-Modal Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43103-43112} }
MVLM: Template-Free Tracking via Vision-Language Margin Confidence and Memory-Gated Tracking: Dae-Hyeon Park,

Mina Baek,

Jeong-Hun Ha,

Chan-Seop Park,

Jamshidjon Ganiev,

Seung-Hwan Bae; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Dae-Hyeon and Baek, Mina and Ha, Jeong-Hun and Park, Chan-Seop and Ganiev, Jamshidjon and Bae, Seung-Hwan}, title = {MVLM: Template-Free Tracking via Vision-Language Margin Confidence and Memory-Gated Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35156-35165} }
Machine Mental Imagery: Empower Multimodal Reasoning with Latent Visual Tokens: Zeyuan Yang,

Xueyang Yu,

Delin Chen,

Maohao Shen,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zeyuan and Yu, Xueyang and Chen, Delin and Shen, Maohao and Gan, Chuang}, title = {Machine Mental Imagery: Empower Multimodal Reasoning with Latent Visual Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33510-33520} }
InTrain: Intrinsic Trainability for Zero-Cost Neural Architecture Search: Qinqin Zhou,

Fuhai Chen,

Jipeng Wu,

Zhiwei Chen,

Zhikai Hu,

Weiwei Cai; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qinqin and Chen, Fuhai and Wu, Jipeng and Chen, Zhiwei and Hu, Zhikai and Cai, Weiwei}, title = {InTrain: Intrinsic Trainability for Zero-Cost Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20181-20190} }
Flow Map Distillation Without Data: Shangyuan Tong,

Nanye Ma,

Saining Xie,

Tommi Jaakkola; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2026_CVPR, author = {Tong, Shangyuan and Ma, Nanye and Xie, Saining and Jaakkola, Tommi}, title = {Flow Map Distillation Without Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33973-33984} }
Unified Vector Floorplan Generation via Markup Representation: Kaede Shiohara,

Toshihiko Yamasaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shiohara_2026_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko}, title = {Unified Vector Floorplan Generation via Markup Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39262-39271} }
Contrastive Cross-Bag Augmentation for Multiple Instance Learning-based Whole Slide Image Classification: Bo Zhang,

Xinan Xu,

Shuo Yan,

Yu Bai,

Zheng Zhang,

Wufan Wang,

Hui Gao,

Wendong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bo and Xu, Xinan and Yan, Shuo and Bai, Yu and Zhang, Zheng and Wang, Wufan and Gao, Hui and Wang, Wendong}, title = {Contrastive Cross-Bag Augmentation for Multiple Instance Learning-based Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21089-21098} }
Cluster-aware Anchor Learning for Multi-View Clustering: Zhe Chen,

Fanhui Meng,

Tianyang Xu,

Xiao-Jun Wu; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhe and Meng, Fanhui and Xu, Tianyang and Wu, Xiao-Jun}, title = {Cluster-aware Anchor Learning for Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17714-17723} }
Learning from Synthetic Data via Provenance-Based Input Gradient Guidance: Koshiro Nagano,

Ryo Fujii,

Ryo Hachiuma,

Fumiaki Sato,

Taiki Sekii,

Hideo Saito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagano_2026_CVPR, author = {Nagano, Koshiro and Fujii, Ryo and Hachiuma, Ryo and Sato, Fumiaki and Sekii, Taiki and Saito, Hideo}, title = {Learning from Synthetic Data via Provenance-Based Input Gradient Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18796-18805} }
Discover, Segment, and Select: A Progressive Mechanism for Zero-shot Camouflaged Object Segmentation: Yilong Yang,

Jianxin Tian,

Shengchuan Zhang,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yilong and Tian, Jianxin and Zhang, Shengchuan and Cao, Liujuan}, title = {Discover, Segment, and Select: A Progressive Mechanism for Zero-shot Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34745-34754} }
MAGICIAN: Efficient Long-Term Planning with Imagined Gaussians for Active Mapping: Shiyao Li,

Antoine Guédon,

Shizhe Chen,

Vincent Lepetit; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shiyao and Gu\'edon, Antoine and Chen, Shizhe and Lepetit, Vincent}, title = {MAGICIAN: Efficient Long-Term Planning with Imagined Gaussians for Active Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21606-21615} }
Fast3Dcache: Training-free 3D Geometry Synthesis Acceleration: Mengyu Yang,

Yanming Yang,

Chenyi Xu,

Chenxi Song,

Yufan Zuo,

Tong Zhao,

Ruibo Li,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Mengyu and Yang, Yanming and Xu, Chenyi and Song, Chenxi and Zuo, Yufan and Zhao, Tong and Li, Ruibo and Zhang, Chi}, title = {Fast3Dcache: Training-free 3D Geometry Synthesis Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27030-27040} }
PrivateEyes: Gaze-Preserving Anonymization for Data Sharing: Surabhi Gupta,

Dinesh Prabhu Muthumariappan,

Biplab Das,

Anoop Kolar Rajagopal,

Kiran Nanjunda Iyer,

Donghwan Seo; [pdf] [supp]
[bibtex]
@InProceedings{Gupta_2026_CVPR, author = {Gupta, Surabhi and Muthumariappan, Dinesh Prabhu and Das, Biplab and Rajagopal, Anoop Kolar and Iyer, Kiran Nanjunda and Seo, Donghwan}, title = {PrivateEyes: Gaze-Preserving Anonymization for Data Sharing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3274-3283} }
ForeAct: Steering Your VLA with Efficient Visual Foresight Planning: Zhuoyang Zhang,

Shang Yang,

Qinghao Hu,

Luke J. Huang,

James Hou,

Yufei Sun,

Yao Lu,

Song Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhuoyang and Yang, Shang and Hu, Qinghao and Huang, Luke J. and Hou, James and Sun, Yufei and Lu, Yao and Han, Song}, title = {ForeAct: Steering Your VLA with Efficient Visual Foresight Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37195-37205} }
RAPID: Reusing Attention Sparsity with Inter-step Adaptation for Efficient Video Diffusion: Shangran Lin,

Lu Lu,

Jian Chen,

Qiang Liu; [pdf]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Shangran and Lu, Lu and Chen, Jian and Liu, Qiang}, title = {RAPID: Reusing Attention Sparsity with Inter-step Adaptation for Efficient Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36147-36156} }
All Vehicles Can Lie: Efficient Adversarial Defense in Fully Untrusted-Vehicle Collaborative Perception via Pseudo-Random Bayesian Inference: Yi Yu,

Libing Wu,

Zhuangzhuang Zhang,

Jing Qiu,

Lijuan Huo,

Jiaqi Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yi and Wu, Libing and Zhang, Zhuangzhuang and Qiu, Jing and Huo, Lijuan and Feng, Jiaqi}, title = {All Vehicles Can Lie: Efficient Adversarial Defense in Fully Untrusted-Vehicle Collaborative Perception via Pseudo-Random Bayesian Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6549-6558} }
C-LaV: Conditional Latent Velocity Field Denoising for Weather-Robust LiDAR Place Recognition: Xuewei Cao,

Jiayue Yang,

Zhiwen Zeng,

Yanyong Zhang,

Yan Xia; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Xuewei and Yang, Jiayue and Zeng, Zhiwen and Zhang, Yanyong and Xia, Yan}, title = {C-LaV: Conditional Latent Velocity Field Denoising for Weather-Robust LiDAR Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2928-2937} }
Uncertainty-Aware Knowledge Distillation for Multimodal Large Language Models: Jingchen Sun,

Shaobo Han,

Deep Patel,

Wataru Kohno,

Can Jin,

Changyou Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jingchen and Han, Shaobo and Patel, Deep and Kohno, Wataru and Jin, Can and Chen, Changyou}, title = {Uncertainty-Aware Knowledge Distillation for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5585-5595} }
Adaptive Auxiliary Prompt Blending for Target-Faithful Diffusion Generation: Kwanyoung Lee,

SeungJu Cha,

Yebin Ahn,

Hyunwoo Oh,

Sungho Koh,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Kwanyoung and Cha, SeungJu and Ahn, Yebin and Oh, Hyunwoo and Koh, Sungho and Kim, Dong-Jin}, title = {Adaptive Auxiliary Prompt Blending for Target-Faithful Diffusion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43707-43716} }
Time-Aware One Step Diffusion Network for Real-World Image Super-Resolution: Tianyi Zhang,

Zheng-Peng Duan,

Chun-Le Guo,

Peng-Tao Jiang,

Bo Li,

Ming-Ming Cheng,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianyi and Duan, Zheng-Peng and Guo, Chun-Le and Jiang, Peng-Tao and Li, Bo and Cheng, Ming-Ming and Li, Chongyi}, title = {Time-Aware One Step Diffusion Network for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30543-30552} }
Fast Markov Random Field Optimisation for Topologically Noisy 3D Shape Matching: Paul Roetzer,

Johan Thunberg,

Zorah Lähner,

Florian Bernard; [pdf] [supp]
[bibtex]
@InProceedings{Roetzer_2026_CVPR, author = {Roetzer, Paul and Thunberg, Johan and L\"ahner, Zorah and Bernard, Florian}, title = {Fast Markov Random Field Optimisation for Topologically Noisy 3D Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24162-24172} }
Medic-AD: Towards Medical Vision-Language Model's Clinical Intelligence: Woohyeon Park,

Jaeik Kim,

Sunghwan Steve Cho,

Pa Hong,

Wookyoung Jeong,

Yoojin Nam,

Namjoon Kim,

Ginny Y. Wong,

Ka Chun Cheung,

Jaeyoung Do; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Woohyeon and Kim, Jaeik and Cho, Sunghwan Steve and Hong, Pa and Jeong, Wookyoung and Nam, Yoojin and Kim, Namjoon and Wong, Ginny Y. and Cheung, Ka Chun and Do, Jaeyoung}, title = {Medic-AD: Towards Medical Vision-Language Model's Clinical Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36321-36331} }
MorphSeek: Fine-grained Latent Representation-Level Policy Optimization for Deformable Image Registration: Runxun Zhang,

Yizhou Liu,

Dongrui Li,

Bo Xu,

Jingwei Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Runxun and Liu, Yizhou and Li, Dongrui and Xu, Bo and Wei, Jingwei}, title = {MorphSeek: Fine-grained Latent Representation-Level Policy Optimization for Deformable Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27460-27470} }
DVAR: Dynamic Visual Autoregressive Modeling for Image Super-Resolution: Yu Zheng,

Kai Zhang,

Wei Zhu,

Qingguo Liu,

Xiantao Hu,

Jun Li,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yu and Zhang, Kai and Zhu, Wei and Liu, Qingguo and Hu, Xiantao and Li, Jun and Yang, Jian}, title = {DVAR: Dynamic Visual Autoregressive Modeling for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23378-23387} }
GUI-SAGE: Enhancing GUI Automation with Self-Explanatory Learning: Fei Tang,

Zhangxuan Gu,

Zhengxi Lu,

Shangzhan Zhang,

Zhengwen Zeng,

Shuheng Shen,

Changhua Meng,

Yuchen Yan,

Wenqi Zhang,

Yongliang Shen,

Weiming Lu,

Yueting Zhuang; [pdf]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Fei and Gu, Zhangxuan and Lu, Zhengxi and Zhang, Shangzhan and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Yan, Yuchen and Zhang, Wenqi and Shen, Yongliang and Lu, Weiming and Zhuang, Yueting}, title = {GUI-SAGE: Enhancing GUI Automation with Self-Explanatory Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13007-13016} }
3DrawAgent: Teaching LLM to Draw in 3D with Early Contrastive Experience: Hongcan Xiao,

Xinyue Xiao,

Yilin Wang,

Yue Zhang,

Yonggang Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Hongcan and Xiao, Xinyue and Wang, Yilin and Zhang, Yue and Qi, Yonggang}, title = {3DrawAgent: Teaching LLM to Draw in 3D with Early Contrastive Experience}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27179-27187} }
RAM: Recover Any 3D Human Motion in-the-Wild: Sen Jia,

Ning Zhu,

Jinqin Zhong,

Jiale Zhou,

Huaping Zhang,

Jenq-Neng Hwang,

Lei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Sen and Zhu, Ning and Zhong, Jinqin and Zhou, Jiale and Zhang, Huaping and Hwang, Jenq-Neng and Li, Lei}, title = {RAM: Recover Any 3D Human Motion in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42789-42799} }
GaussianGrow: Geometry-aware Gaussian Growing from 3D Point Clouds with Text Guidance: Weiqi Zhang,

Junsheng Zhou,

Haotian Geng,

Kanle Shi,

Shenkun Xu,

Yi Fang,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Weiqi and Zhou, Junsheng and Geng, Haotian and Shi, Kanle and Xu, Shenkun and Fang, Yi and Liu, Yu-Shen}, title = {GaussianGrow: Geometry-aware Gaussian Growing from 3D Point Clouds with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18968-18979} }
REVIVE 3D: Refinement via Encoded Voluminous Inflated prior for Volume Enhancement: Hankyeol Lee,

Wooyeol Baek,

Seongdo Kim,

Jongyoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Hankyeol and Baek, Wooyeol and Kim, Seongdo and Kim, Jongyoo}, title = {REVIVE 3D: Refinement via Encoded Voluminous Inflated prior for Volume Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26984-26994} }
UniFusion: A Unified Image Fusion Framework with Robust Representation and Source-Aware Preservation: Xingyuan Li,

Songcheng Du,

Yang Zou,

Haoyuan Xu,

Zhiying Jiang,

Jinyuan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xingyuan and Du, Songcheng and Zou, Yang and Xu, Haoyuan and Jiang, Zhiying and Liu, Jinyuan}, title = {UniFusion: A Unified Image Fusion Framework with Robust Representation and Source-Aware Preservation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33869-33880} }
GazeShift: Unsupervised Gaze Estimation and Dataset for VR: Gil Shapira,

Ishay Goldin,

Evgeny Artyomov,

Donghoon Kim,

Yosi Keller,

Niv Zehngut; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shapira_2026_CVPR, author = {Shapira, Gil and Goldin, Ishay and Artyomov, Evgeny and Kim, Donghoon and Keller, Yosi and Zehngut, Niv}, title = {GazeShift: Unsupervised Gaze Estimation and Dataset for VR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24290-24299} }
A3: Towards Advertising Aesthetic Assessment: Kaiyuan Ji,

Yixuan Gao,

Lu Sun,

Yushuo Zheng,

Zijian Chen,

Jianbo Zhang,

Xiangyang Zhu,

Yuan Tian,

Zicheng Zhang,

Guangtao Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Kaiyuan and Gao, Yixuan and Sun, Lu and Zheng, Yushuo and Chen, Zijian and Zhang, Jianbo and Zhu, Xiangyang and Tian, Yuan and Zhang, Zicheng and Zhai, Guangtao}, title = {A3: Towards Advertising Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9478-9490} }
Layered 4D-Rotor Gaussian Splatting: A Compressed Representation for Long Dynamic Scenes: Hanjie Xu,

Yuanxing Duan,

Qiyu Dai,

Ge Li,

Baoquan Chen,

He Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Hanjie and Duan, Yuanxing and Dai, Qiyu and Li, Ge and Chen, Baoquan and Wang, He}, title = {Layered 4D-Rotor Gaussian Splatting: A Compressed Representation for Long Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18958-18967} }
Just-in-Time: Training-Free Spatial Acceleration for Diffusion Transformers: Wenhao Sun,

Ji Li,

Zhaoqiang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Wenhao and Li, Ji and Liu, Zhaoqiang}, title = {Just-in-Time: Training-Free Spatial Acceleration for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40643-40652} }
SigLino: Efficient Multi-Teacher Distillation for Agglomerative Vision Foundation Models: Sofian Chaybouti,

Sanath Narayan,

Yasser Dahou,

Phúc H. Lê Khắc,

Ankit Singh,

Ngoc Huynh,

Wamiq Reyaz Para,

Hilde Kuehne,

Hakim Hacid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaybouti_2026_CVPR, author = {Chaybouti, Sofian and Narayan, Sanath and Dahou, Yasser and Khắc, Ph\'uc H. L\^e and Singh, Ankit and Huynh, Ngoc and Para, Wamiq Reyaz and Kuehne, Hilde and Hacid, Hakim}, title = {SigLino: Efficient Multi-Teacher Distillation for Agglomerative Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10071-10081} }
First Logit Boosting: Visual Grounding Method to Mitigate Object Hallucination in Large Vision-Language Models: Jiwoo Ha,

Jongwoo Baek,

Jinhyun So; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ha_2026_CVPR, author = {Ha, Jiwoo and Baek, Jongwoo and So, Jinhyun}, title = {First Logit Boosting: Visual Grounding Method to Mitigate Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18241-18250} }
Gravitation-Driven Semantic Alignment for Text Video Retrieval: Yi Yang,

Zheng Wang,

Xing Xu,

Jingkuan Song,

Heng Tao Shen; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Wang, Zheng and Xu, Xing and Song, Jingkuan and Shen, Heng Tao}, title = {Gravitation-Driven Semantic Alignment for Text Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14946-14956} }
Beyond Pixel Simulation: Pathology Image Generation via Diagnostic Semantic Tokens and Prototype Control: Minghao Han,

Yichen Liu,

Yizhou Liu,

Zizhi Chen,

Jingqun Tang,

Xuecheng Wu,

Dingkang Yang,

Lihua Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Minghao and Liu, Yichen and Liu, Yizhou and Chen, Zizhi and Tang, Jingqun and Wu, Xuecheng and Yang, Dingkang and Zhang, Lihua}, title = {Beyond Pixel Simulation: Pathology Image Generation via Diagnostic Semantic Tokens and Prototype Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42744-42754} }
OmniGround: A Comprehensive Spatio-Temporal Grounding Benchmark for Real-World Complex Scenarios: Hong Gao,

Jingyu Wu,

Xiangkai Xu,

Kangni Xie,

Yunchen Zhang,

Bin Zhong,

Xurui Gao,

Min-Ling Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Hong and Wu, Jingyu and Xu, Xiangkai and Xie, Kangni and Zhang, Yunchen and Zhong, Bin and Gao, Xurui and Zhang, Min-Ling}, title = {OmniGround: A Comprehensive Spatio-Temporal Grounding Benchmark for Real-World Complex Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17588-17597} }
GeoMMBench and GeoMMAgent: Toward Expert-Level Multimodal Intelligence in Geoscience and Remote Sensing: Aoran Xiao,

Shihao Cheng,

Yonghao Xu,

Yexian Ren,

Hongruixuan Chen,

Naoto Yokoya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Aoran and Cheng, Shihao and Xu, Yonghao and Ren, Yexian and Chen, Hongruixuan and Yokoya, Naoto}, title = {GeoMMBench and GeoMMAgent: Toward Expert-Level Multimodal Intelligence in Geoscience and Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34843-34853} }
FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control: Zhiyuan Zhang,

Can Wang,

Dongdong Chen,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhiyuan and Wang, Can and Chen, Dongdong and Liao, Jing}, title = {FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4221-4231} }
Enhancing Accuracy of Uncertainty Estimation in Appearance-based Gaze Tracking with Probabilistic Evaluation and Calibration: Qiaojie Zheng,

Jiucai Zhang,

Amy Zhang,

Xiaoli Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Qiaojie and Zhang, Jiucai and Zhang, Amy and Zhang, Xiaoli}, title = {Enhancing Accuracy of Uncertainty Estimation in Appearance-based Gaze Tracking with Probabilistic Evaluation and Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13793-13801} }
Draft and Refine with Visual Experts: Sungheon Jeong,

Ryozo Masukawa,

Jihong Park,

Sanggeon Yun,

Wenjun Huang,

Hanning Chen,

Mahdi Imani,

Mohsen Imani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Sungheon and Masukawa, Ryozo and Park, Jihong and Yun, Sanggeon and Huang, Wenjun and Chen, Hanning and Imani, Mahdi and Imani, Mohsen}, title = {Draft and Refine with Visual Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18816-18826} }
Seeing is Improving: Visual Feedback for Iterative Text Layout Refinement: Junrong Guo,

Shancheng Fang,

Yadong Qu,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Junrong and Fang, Shancheng and Qu, Yadong and Xie, Hongtao}, title = {Seeing is Improving: Visual Feedback for Iterative Text Layout Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25893-25903} }
Hier-COS: Making Deep Features Hierarchy-aware via Composition of Orthogonal Subspaces: Depanshu Sani,

Saket Anand; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sani_2026_CVPR, author = {Sani, Depanshu and Anand, Saket}, title = {Hier-COS: Making Deep Features Hierarchy-aware via Composition of Orthogonal Subspaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11263-11272} }
Prototype-Guided Concept Erasure in Diffusion Models: Yuze Cai,

Jiahao Lu,

Hongxiang Shi,

Yichao Zhou,

Hong Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Yuze and Lu, Jiahao and Shi, Hongxiang and Zhou, Yichao and Lu, Hong}, title = {Prototype-Guided Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16509-16519} }
DBMSolver: A Training-free Diffusion Bridge Sampler for High-Quality Image-to-Image Translation: Sankarshana Venugopal,

Mohammad Mostafavi,

Jonghyun Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Venugopal_2026_CVPR, author = {Venugopal, Sankarshana and Mostafavi, Mohammad and Choi, Jonghyun}, title = {DBMSolver: A Training-free Diffusion Bridge Sampler for High-Quality Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36062-36071} }
UETrack: A Unified and Efficient Framework for Single Object Tracking: Ben Kang,

Jie Zhao,

Xin Chen,

Wanting Geng,

Bin Zhang,

Lu Zhang,

Dong Wang,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Ben and Zhao, Jie and Chen, Xin and Geng, Wanting and Zhang, Bin and Zhang, Lu and Wang, Dong and Lu, Huchuan}, title = {UETrack: A Unified and Efficient Framework for Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20890-20901} }
CoLC: Communication-Efficient Collaborative Perception with LiDAR Completion: Yushan Han,

Hui Zhang,

Qiming Xia,

Yi Jin,

Yidong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Yushan and Zhang, Hui and Xia, Qiming and Jin, Yi and Li, Yidong}, title = {CoLC: Communication-Efficient Collaborative Perception with LiDAR Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2983-2992} }
Mixture of Style Experts for Diverse Image Stylization: Shihao Zhu,

Ziheng Ouyang,

Yijia Kang,

Qilong Wang,

Mi Zhou,

Bo Li,

Ming-Ming Cheng,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Shihao and Ouyang, Ziheng and Kang, Yijia and Wang, Qilong and Zhou, Mi and Li, Bo and Cheng, Ming-Ming and Hou, Qibin}, title = {Mixture of Style Experts for Diverse Image Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30500-30510} }
Consistent Instance Field for Dynamic Scene Understanding: Junyi Wu,

Van Nguyen Nguyen,

Benjamin Planche,

Jiachen Tao,

Changchang Sun,

Zhongpai Gao,

Zhenghao Zhao,

Anwesa Choudhuri,

Gengyu Zhang,

Meng Zheng,

Feiran Wang,

Terrence Chen,

Yan Yan,

Ziyan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Junyi and Nguyen, Van Nguyen and Planche, Benjamin and Tao, Jiachen and Sun, Changchang and Gao, Zhongpai and Zhao, Zhenghao and Choudhuri, Anwesa and Zhang, Gengyu and Zheng, Meng and Wang, Feiran and Chen, Terrence and Yan, Yan and Wu, Ziyan}, title = {Consistent Instance Field for Dynamic Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3783-3793} }
Multinex: Lightweight Low-light Image Enhancement via Multi-prior Retinex: Alexandru Brateanu,

Tingting Mu,

Codruta O. Ancuti,

Cosmin Ancuti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brateanu_2026_CVPR, author = {Brateanu, Alexandru and Mu, Tingting and Ancuti, Codruta O. and Ancuti, Cosmin}, title = {Multinex: Lightweight Low-light Image Enhancement via Multi-prior Retinex}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29887-29896} }
More Natural, More Real: Object-aware Gaussian Splatting for 3D Visual Decoding from Human Brain: Haodong Jing,

Dongyao Jiang,

Jixin Wang,

Junhao Jia,

Yanshu Li,

Yongqiang Ma,

Nanning Zheng; [pdf]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Haodong and Jiang, Dongyao and Wang, Jixin and Jia, Junhao and Li, Yanshu and Ma, Yongqiang and Zheng, Nanning}, title = {More Natural, More Real: Object-aware Gaussian Splatting for 3D Visual Decoding from Human Brain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19033-19044} }
FoleyDesigner: Immersive Stereo Foley Generation with Precise Spatio-Temporal Alignment for Film Clips: Mengtian Li,

Kunyan Dai,

Yi Ding,

Ruobing Ni,

Ying Zhang,

Wenwu Wang,

Zhifeng Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Dai, Kunyan and Ding, Yi and Ni, Ruobing and Zhang, Ying and Wang, Wenwu and Xie, Zhifeng}, title = {FoleyDesigner: Immersive Stereo Foley Generation with Precise Spatio-Temporal Alignment for Film Clips}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4291-4300} }
HSI-GPT2: A Dual-Granularity Large Motion Reasoning Model with Diffusion Refinement for Human-Scene Interaction: Yuan Wang,

Xiang Li,

Yali Li,

Xuege Hou,

Shengjin Wang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuan and Li, Xiang and Li, Yali and Hou, Xuege and Wang, Shengjin}, title = {HSI-GPT2: A Dual-Granularity Large Motion Reasoning Model with Diffusion Refinement for Human-Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16432-16442} }
Streamlined Knowledge Distillation: Hyeon-Jin Jeong,

Han-Jin Lee,

Seok-Hwan Choi; [pdf]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Hyeon-Jin and Lee, Han-Jin and Choi, Seok-Hwan}, title = {Streamlined Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26646-26655} }
Closed-Form Concept Erasure via Double Projections: Chi Zhang,

Jingpu Cheng,

Zhixian Wang,

Ping Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Cheng, Jingpu and Wang, Zhixian and Liu, Ping}, title = {Closed-Form Concept Erasure via Double Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24503-24513} }
Intra-class Distribution-guided Generative Hashing with Neighbor Refinement for Cross-modal Retrieval: Hao Sun,

Yadong Huo,

Qibing Qin,

Wenfeng Zhang,

Lei Huang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Hao and Huo, Yadong and Qin, Qibing and Zhang, Wenfeng and Huang, Lei}, title = {Intra-class Distribution-guided Generative Hashing with Neighbor Refinement for Cross-modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2671-2681} }
Prototype-based Causal Intervention for Multi-Label Image Classification: Yanmin Li,

Zhilong Mao,

Mao Wang,

Lihua Liu,

Jibing Wu,

Weidong Bao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanmin and Mao, Zhilong and Wang, Mao and Liu, Lihua and Wu, Jibing and Bao, Weidong}, title = {Prototype-based Causal Intervention for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24738-24747} }
BriMA: Bridged Modality Adaptation for Multi-Modal Continual Action Quality Assessment: Kanglei Zhou,

Chang Li,

Qingyi Pan,

Liyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Kanglei and Li, Chang and Pan, Qingyi and Wang, Liyuan}, title = {BriMA: Bridged Modality Adaptation for Multi-Modal Continual Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38904-38914} }
Decoupling Bias, Aligning Distributions: Synergistic Fairness Optimization for Deepfake Detection: Feng Ding,

Wenhui Yi,

Yunpeng Zhou,

Xinan He,

Hong Rao,

Shu Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Feng and Yi, Wenhui and Zhou, Yunpeng and He, Xinan and Rao, Hong and Hu, Shu}, title = {Decoupling Bias, Aligning Distributions: Synergistic Fairness Optimization for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32704-32713} }
Real2Sim2Real: RetinalDepth-64K for Depth Estimation in Posterior Segment Ophthalmic Surgery: Bingwen Dong,

Gan Liu,

Xiaoxi Lu,

Guangcheng Chen,

Jialu Zhang,

Yan Hu,

Xiaoqing Zhang,

Jiang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Bingwen and Liu, Gan and Lu, Xiaoxi and Chen, Guangcheng and Zhang, Jialu and Hu, Yan and Zhang, Xiaoqing and Liu, Jiang}, title = {Real2Sim2Real: RetinalDepth-64K for Depth Estimation in Posterior Segment Ophthalmic Surgery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26899-26908} }
BioVITA: Biological Dataset, Model, and Benchmark for Visual-Textual-Acoustic Alignment: Risa Shinoda,

Kaede Shiohara,

Nakamasa Inoue,

Kuniaki Saito,

Hiroaki Santo,

Fumio Okura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shinoda_2026_CVPR, author = {Shinoda, Risa and Shiohara, Kaede and Inoue, Nakamasa and Saito, Kuniaki and Santo, Hiroaki and Okura, Fumio}, title = {BioVITA: Biological Dataset, Model, and Benchmark for Visual-Textual-Acoustic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29336-29346} }
Anti-I2V: Safeguarding your Photos from Malicious Image-to-video Generation: Duc Vu,

Anh Nguyen,

Chi Tran,

Anh Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vu_2026_CVPR, author = {Vu, Duc and Nguyen, Anh and Tran, Chi and Tran, Anh}, title = {Anti-I2V: Safeguarding your Photos from Malicious Image-to-video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37621-37631} }
HFedATM: Hierarchical Federated Domain Generalization via Optimal Transport and Regularized Mean Aggregation: Thinh Nguyen,

Trung Phan,

Binh Nguyen,

Khoa D Doan,

Kok-Seng Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Phan, Trung and Nguyen, Binh and Doan, Khoa D and Wong, Kok-Seng}, title = {HFedATM: Hierarchical Federated Domain Generalization via Optimal Transport and Regularized Mean Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39435-39444} }
Scaling the Long Video Understanding of Multimodal Large Language Models via Visual Memory Mechanism: Tao Chen,

Kun Zhang,

Qiong Wu,

Xiao Chen,

Chao Chang,

Xiaoshuai Sun,

Yiyi Zhou,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tao and Zhang, Kun and Wu, Qiong and Chen, Xiao and Chang, Chao and Sun, Xiaoshuai and Zhou, Yiyi and Ji, Rongrong}, title = {Scaling the Long Video Understanding of Multimodal Large Language Models via Visual Memory Mechanism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31877-31888} }
Learning 3D Shape Fidelity Metric from Real-world Distortions: Xuelu Feng,

Tianyu Luan,

Zixin Zhu,

Akshobhya Sharma,

Phani Nuney,

Junsong Yuan,

Chunming Qiao; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Xuelu and Luan, Tianyu and Zhu, Zixin and Sharma, Akshobhya and Nuney, Phani and Yuan, Junsong and Qiao, Chunming}, title = {Learning 3D Shape Fidelity Metric from Real-world Distortions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28391-28401} }
UDAPose: Unsupervised Domain Adaptation for Low-Light Human Pose Estimation: Haopeng Chen,

Yihao Ai,

Kabeen Kim,

Robby T. Tan,

Yixin Chen,

Bo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haopeng and Ai, Yihao and Kim, Kabeen and Tan, Robby T. and Chen, Yixin and Wang, Bo}, title = {UDAPose: Unsupervised Domain Adaptation for Low-Light Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13781-13792} }
Hyperbolic Prototype Learning with Uncertainty-Aware Consistency for Continual Test-Time Segmentation: Siddhant Gole,

Akash Pal,

Amit More,

S Divakar Bhat,

Subhasis Chaudhuri,

Biplab Banerjee; [pdf] [supp]
[bibtex]
@InProceedings{Gole_2026_CVPR, author = {Gole, Siddhant and Pal, Akash and More, Amit and Bhat, S Divakar and Chaudhuri, Subhasis and Banerjee, Biplab}, title = {Hyperbolic Prototype Learning with Uncertainty-Aware Consistency for Continual Test-Time Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34694-34703} }
Learning to Assist: Physics-Grounded Human-Human Control via Multi-Agent Reinforcement Learning: Yuto Shibata,

Kashu Yamazaki,

Lalit Jayanti,

Yoshimitsu Aoki,

Mariko Isogawa,

Katerina Fragkiadaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shibata_2026_CVPR, author = {Shibata, Yuto and Yamazaki, Kashu and Jayanti, Lalit and Aoki, Yoshimitsu and Isogawa, Mariko and Fragkiadaki, Katerina}, title = {Learning to Assist: Physics-Grounded Human-Human Control via Multi-Agent Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38345-38354} }
WHU-MARS: A Multispectral Aerial-Ground Benchmark Towards Any-Scenario Person Re-Identification: Yuxuan Zhao,

Zhongao Zhou,

Bin Yang,

He Li,

Jian Liang,

Jun Chen,

Bo Du,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuxuan and Zhou, Zhongao and Yang, Bin and Li, He and Liang, Jian and Chen, Jun and Du, Bo and Ye, Mang}, title = {WHU-MARS: A Multispectral Aerial-Ground Benchmark Towards Any-Scenario Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25461-25471} }
SAGE: Scalable Agentic 3D Scene Generation for Embodied AI: Hongchi Xia,

Xuan Li,

Zhaoshuo Li,

Qianli Ma,

Jiashu Xu,

Ming-Yu Liu,

Yin Cui,

Tsung-Yi Lin,

Wei-Chiu Ma,

Shenlong Wang,

Shuran Song,

Fangyin Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Hongchi and Li, Xuan and Li, Zhaoshuo and Ma, Qianli and Xu, Jiashu and Liu, Ming-Yu and Cui, Yin and Lin, Tsung-Yi and Ma, Wei-Chiu and Wang, Shenlong and Song, Shuran and Wei, Fangyin}, title = {SAGE: Scalable Agentic 3D Scene Generation for Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22358-22368} }
GVIS: Generative Vector Image Steganography: Zihao Xu,

Dawei Xu,

Zihan Li,

Xixi Zheng,

Chuan Zhang; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zihao and Xu, Dawei and Li, Zihan and Zheng, Xixi and Zhang, Chuan}, title = {GVIS: Generative Vector Image Steganography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9384-9393} }
Photo-Guided Tooth Segmentation on 3D Oral Scan Model: Shaojie Zhuang,

Guangshun Wei,

Jiangxin He,

Yuanfeng Zhou; [pdf]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Shaojie and Wei, Guangshun and He, Jiangxin and Zhou, Yuanfeng}, title = {Photo-Guided Tooth Segmentation on 3D Oral Scan Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37558-37567} }
OMGTex: One-stage Multi-style Facial Texture Reconstruction without Geometry Guidance: Zitong Xiao,

Yuda Qiu,

Zisheng Ye,

Xiaoguang Han; [pdf]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Zitong and Qiu, Yuda and Ye, Zisheng and Han, Xiaoguang}, title = {OMGTex: One-stage Multi-style Facial Texture Reconstruction without Geometry Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21242-21251} }
Multi-Scale Gaussian-Language Map for Zero-shot Embodied Navigation and Reasoning: Sixian Zhang,

Yiyao Wang,

Xinhang Song,

Keming Zhang,

Zijian Xu,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Sixian and Wang, Yiyao and Song, Xinhang and Zhang, Keming and Xu, Zijian and Jiang, Shuqiang}, title = {Multi-Scale Gaussian-Language Map for Zero-shot Embodied Navigation and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37086-37097} }
Can You Learn to See Without Images? Procedural Warm-Up for Vision Transformers: Zachary Shinnick,

Liangze Jiang,

Hemanth Saratchandran,

Damien Teney,

Anton van den Hengel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shinnick_2026_CVPR, author = {Shinnick, Zachary and Jiang, Liangze and Saratchandran, Hemanth and Teney, Damien and van den Hengel, Anton}, title = {Can You Learn to See Without Images? Procedural Warm-Up for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27439-27448} }
LagerNVS: Latent Geometry for Fully Neural Real-time Novel View Synthesis: Stanislaw Szymanowicz,

Minghao Chen,

Jianyuan Wang,

Christian Rupprecht,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Szymanowicz_2026_CVPR, author = {Szymanowicz, Stanislaw and Chen, Minghao and Wang, Jianyuan and Rupprecht, Christian and Vedaldi, Andrea}, title = {LagerNVS: Latent Geometry for Fully Neural Real-time Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15443-15453} }
Structure-Aware Representation Distillation for Tiny-Dense Object Segmentation: Xuesong Liu,

Anke Xu,

Wenbo Cao,

Emmett Ientilucci; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xuesong and Xu, Anke and Cao, Wenbo and Ientilucci, Emmett}, title = {Structure-Aware Representation Distillation for Tiny-Dense Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34775-34783} }
Linear Image Generation by Synthesizing Exposure Brackets: Yuekun Dai,

Zhoutong Zhang,

Shangchen Zhou,

Nanxuan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Yuekun and Zhang, Zhoutong and Zhou, Shangchen and Zhao, Nanxuan}, title = {Linear Image Generation by Synthesizing Exposure Brackets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16206-16215} }
Quota-Calibrated Fine-Grained Alignment with Context-Aware Marginals for Text-based Person Retrieval: Dongsheng Li,

Xinyuan Guo,

Huijie Zhang,

Pingting Hao,

Qiushi Xia; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Dongsheng and Guo, Xinyuan and Zhang, Huijie and Hao, Pingting and Xia, Qiushi}, title = {Quota-Calibrated Fine-Grained Alignment with Context-Aware Marginals for Text-based Person Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31103-31112} }
SpaceTools: Tool-Augmented Spatial Reasoning via Double Interactive RL: Siyi Chen,

Mikaela Angelina Uy,

Chan Hee Song,

Faisal Ladhak,

Adithyavairavan Murali,

Qing Qu,

Stan Birchfield,

Valts Blukis,

Jonathan Tremblay; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Siyi and Uy, Mikaela Angelina and Song, Chan Hee and Ladhak, Faisal and Murali, Adithyavairavan and Qu, Qing and Birchfield, Stan and Blukis, Valts and Tremblay, Jonathan}, title = {SpaceTools: Tool-Augmented Spatial Reasoning via Double Interactive RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37109-37120} }
VLM-Pruner: Buffering for Spatial Sparsity in an Efficient VLM Centrifugal Token Pruning Paradigm: Zhenkai Wu,

Xiaowen Ma,

Zhenliang Ni,

Dengming Zhang,

Han Shu,

Xin Jiang,

Xinghao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenkai and Ma, Xiaowen and Ni, Zhenliang and Zhang, Dengming and Shu, Han and Jiang, Xin and Chen, Xinghao}, title = {VLM-Pruner: Buffering for Spatial Sparsity in an Efficient VLM Centrifugal Token Pruning Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31952-31961} }
Adapting Point Cloud Analysis via Multimodal Bayesian Distribution Learning: Xingyu Zhu,

Liang Yi,

Shuo Wang,

Wenbo Zhu,

Yongliang Wu,

Beier Zhu,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingyu and Yi, Liang and Wang, Shuo and Zhu, Wenbo and Wu, Yongliang and Zhu, Beier and Zhang, Hanwang}, title = {Adapting Point Cloud Analysis via Multimodal Bayesian Distribution Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9976-9985} }
Part$^{2}$GS: Part-aware Modeling of Articulated Objects using 3D Gaussian Splatting: Tianjiao Yu,

Vedant Shah,

Muntasir Wahed,

Ying Shen,

Kiet A. Nguyen,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Tianjiao and Shah, Vedant and Wahed, Muntasir and Shen, Ying and Nguyen, Kiet A. and Lourentzou, Ismini}, title = {Part\${\textasciicircum}\{2\}\$GS: Part-aware Modeling of Articulated Objects using 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18913-18923} }
Anchor-Guided Gradient Alignment for Incomplete Multimodal Learning: Zhi-Hao Guan,

Longfei Huang,

Yang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Zhi-Hao and Huang, Longfei and Yang, Yang}, title = {Anchor-Guided Gradient Alignment for Incomplete Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37896-37905} }
Stake the Points: Structure-Faithful Instance Unlearning: Kiseong Hong,

JungKyoo Shin,

Eunwoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Kiseong and Shin, JungKyoo and Kim, Eunwoo}, title = {Stake the Points: Structure-Faithful Instance Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24524-24533} }
MOSAIC-GS: Monocular Scene Reconstruction via Advanced Initialization for Complex Dynamic Environments: Svitlana Morkva,

Vaishakh Patil,

Alessio Tonioni,

Michael Oechsle,

Maximum Wilder-Smith,

Marco Hutter; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morkva_2026_CVPR, author = {Morkva, Svitlana and Patil, Vaishakh and Tonioni, Alessio and Oechsle, Michael and Wilder-Smith, Maximum and Hutter, Marco}, title = {MOSAIC-GS: Monocular Scene Reconstruction via Advanced Initialization for Complex Dynamic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1167-1176} }
Physically Inspired Gaussian Splatting for HDR Novel View Synthesis: Huimin Zeng,

Yue Bai,

Hailing Wang,

Yun Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Huimin and Bai, Yue and Wang, Hailing and Fu, Yun}, title = {Physically Inspired Gaussian Splatting for HDR Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11808-11817} }
Understanding and Enforcing Weight Disentanglement in Task Arithmetic: Shangge Liu,

Yuehan Yin,

Lei Wang,

Qi Fan,

Yinghuan Shi,

Wenbin Li,

Yang Gao,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shangge and Yin, Yuehan and Wang, Lei and Fan, Qi and Shi, Yinghuan and Li, Wenbin and Gao, Yang and Tao, Dacheng}, title = {Understanding and Enforcing Weight Disentanglement in Task Arithmetic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28744-28753} }
Back to Source: Open-Set Continual Test-Time Adaptation via Domain Compensation: Yingkai Yang,

Chaoqi Chen,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yingkai and Chen, Chaoqi and Huang, Hui}, title = {Back to Source: Open-Set Continual Test-Time Adaptation via Domain Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7957-7966} }
HAD: Hallucination-Aware Diffusion Priors for 3D Reconstruction: Xi Liu,

Weiwei Sun,

Zhou Ren,

Chris Broaddus,

Siyu Huang,

Laurent Guigues; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xi and Sun, Weiwei and Ren, Zhou and Broaddus, Chris and Huang, Siyu and Guigues, Laurent}, title = {HAD: Hallucination-Aware Diffusion Priors for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29781-29791} }
Phantom: Physical Object Interactions as Dynamic Triggers for NMS-Exploited Backdoors: Tianlin Huo,

Dongchuan Ran,

Ranjie Duan,

Yao Zhu,

Peilun Du,

Ningbo Yao,

Huanqian Yan,

Xu Han,

Qiang Yun,

Yuzheng Tan,

Yang Bao,

Yuan He; [pdf] [supp]
[bibtex]
@InProceedings{Huo_2026_CVPR, author = {Huo, Tianlin and Ran, Dongchuan and Duan, Ranjie and Zhu, Yao and Du, Peilun and Yao, Ningbo and Yan, Huanqian and Han, Xu and Yun, Qiang and Tan, Yuzheng and Bao, Yang and He, Yuan}, title = {Phantom: Physical Object Interactions as Dynamic Triggers for NMS-Exploited Backdoors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27906-27915} }
Evolutionary Multimodal Reasoning via Hierarchical Semantic Representation for Intent Recognition: Qianrui Zhou,

Hua Xu,

Yunjin Gu,

Yifan Wang,

Songze Li,

Hanlei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qianrui and Xu, Hua and Gu, Yunjin and Wang, Yifan and Li, Songze and Zhang, Hanlei}, title = {Evolutionary Multimodal Reasoning via Hierarchical Semantic Representation for Intent Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14979-14989} }
Degradation-Consistent Test-Time Adaptation for All-in-One Image Restoration: Ni Tang,

Shenghao Nie,

Xiaotong Luo,

Yuan Xie,

Yanyun Qu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Ni and Nie, Shenghao and Luo, Xiaotong and Xie, Yuan and Qu, Yanyun}, title = {Degradation-Consistent Test-Time Adaptation for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15476-15485} }
ManifoldNeuS: Manifold-aware View Optimizability for Pose-Free Neural Surface Reconstruction: Xinxin Liu,

Xue Wang,

Guoqing Zhou,

Qing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xinxin and Wang, Xue and Zhou, Guoqing and Wang, Qing}, title = {ManifoldNeuS: Manifold-aware View Optimizability for Pose-Free Neural Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {261-271} }
Towards Stealthy and Effective Backdoor Attacks on Lane Detection: A Naturalistic Data Poisoning Approach: Yifan Liao,

Yuxin Cao,

Yedi Zhang,

Wentao He,

Yan Xiao,

Xianglong Du,

Zhiyong Huang,

Jin Song Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Yifan and Cao, Yuxin and Zhang, Yedi and He, Wentao and Xiao, Yan and Du, Xianglong and Huang, Zhiyong and Dong, Jin Song}, title = {Towards Stealthy and Effective Backdoor Attacks on Lane Detection: A Naturalistic Data Poisoning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34950-34960} }
One-to-More: High-Fidelity Training-Free Anomaly Generation with Attention Control: Haoxiang Rao,

Zhao Wang,

Chenyang Si,

Yan Lyu,

Yuanyi Duan,

Fang Zhao,

Caifeng Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rao_2026_CVPR, author = {Rao, Haoxiang and Wang, Zhao and Si, Chenyang and Lyu, Yan and Duan, Yuanyi and Zhao, Fang and Shan, Caifeng}, title = {One-to-More: High-Fidelity Training-Free Anomaly Generation with Attention Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28492-28501} }
MARIS: Marine Open-Vocabulary Instance Segmentation: Bingyu Li,

Feiyu Wang,

Da Zhang,

Zhiyuan Zhao,

Junyu Gao,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingyu and Wang, Feiyu and Zhang, Da and Zhao, Zhiyuan and Gao, Junyu and Li, Xuelong}, title = {MARIS: Marine Open-Vocabulary Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24939-24949} }
Small Object, Great Challenge: A Benchmark for Small Object Visual Grounding: Wenqi Jia,

Ruifan Li,

Pengyue Lin,

Fangxiang Feng,

Zhanyu Ma,

Xiaojie Wang; [pdf]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Wenqi and Li, Ruifan and Lin, Pengyue and Feng, Fangxiang and Ma, Zhanyu and Wang, Xiaojie}, title = {Small Object, Great Challenge: A Benchmark for Small Object Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31822-31832} }
Align While Search: Belief-Guided Exploratory Inference for World-Grounded Embodied Agents: Seohui Bae,

Jeonghye Kim,

Youngchul Sung,

Woohyung Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2026_CVPR, author = {Bae, Seohui and Kim, Jeonghye and Sung, Youngchul and Lim, Woohyung}, title = {Align While Search: Belief-Guided Exploratory Inference for World-Grounded Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29642-29651} }
Unlearning without Forgetting: Securely Removing Targeted Concepts from Large-Scale Vision-Language Open-Vocabulary Detectors: Zhongze Wu,

Xiu Su,

Feng Yang,

Dan Niu,

Shan You,

Yueyi Luo,

Jun Long; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhongze and Su, Xiu and Yang, Feng and Niu, Dan and You, Shan and Luo, Yueyi and Long, Jun}, title = {Unlearning without Forgetting: Securely Removing Targeted Concepts from Large-Scale Vision-Language Open-Vocabulary Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6271-6281} }
rPPG-VQA: A Video Quality Assessment Framework for Unsupervised rPPG Training: Tianyang Dai,

Ming Chang,

Yan Chen,

Yang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Tianyang and Chang, Ming and Chen, Yan and Hu, Yang}, title = {rPPG-VQA: A Video Quality Assessment Framework for Unsupervised rPPG Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1365-1375} }
PE3R: Perception-Efficient 3D Reconstruction: Jie Hu,

Shizun Wang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Jie and Wang, Shizun and Wang, Xinchao}, title = {PE3R: Perception-Efficient 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26877-26887} }
VKG-QA: Visual Knowledge Graph-based Question Answer for Large Multimodal Models: Yuntao Du,

Yiming Wang,

Renshuo Yuan,

Jincheng Yue,

Yijing Chen,

Yue Fan,

Bo Zhang,

Qian Li,

Lizhen Cui; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Yuntao and Wang, Yiming and Yuan, Renshuo and Yue, Jincheng and Chen, Yijing and Fan, Yue and Zhang, Bo and Li, Qian and Cui, Lizhen}, title = {VKG-QA: Visual Knowledge Graph-based Question Answer for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41213-41223} }
AdaCluster: Adaptive Query-Key Clustering for Sparse Attention in Video Generation: Haoyue Tan,

Shengnan Wang,

Yulin Qiao,

Juncheng Zhang,

Youhui Bai,

Ping Gong,

Zewen Jin,

Cheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Haoyue and Wang, Shengnan and Qiao, Yulin and Zhang, Juncheng and Bai, Youhui and Gong, Ping and Jin, Zewen and Li, Cheng}, title = {AdaCluster: Adaptive Query-Key Clustering for Sparse Attention in Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43249-43259} }
InstantViR: Real-Time Video Inverse Problem Solver with Distilled Diffusion Prior: Weimin Bai,

Suzhe Xu,

Yiwei Ren,

Jinhua Hao,

Ming Sun,

Wenzheng Chen,

He Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Weimin and Xu, Suzhe and Ren, Yiwei and Hao, Jinhua and Sun, Ming and Chen, Wenzheng and Sun, He}, title = {InstantViR: Real-Time Video Inverse Problem Solver with Distilled Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16583-16592} }
HyperGait: Unleashing the Power of Parsing for Gait Recognition in the Wild via Hypergraph: Jinkai Zheng,

Jiaqing Wei,

Xinxiang Jin,

Yaoqi Sun,

Xichun Sheng,

Ming Li,

Liangqiong Qu,

Xinchen Liu,

Wu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jinkai and Wei, Jiaqing and Jin, Xinxiang and Sun, Yaoqi and Sheng, Xichun and Li, Ming and Qu, Liangqiong and Liu, Xinchen and Liu, Wu}, title = {HyperGait: Unleashing the Power of Parsing for Gait Recognition in the Wild via Hypergraph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18513-18522} }
TWINGS: Thin Plate Splines Warp-aligned Initialization for Sparse-View Gaussian Splatting: Hyeseong Kim,

Geonhui Son,

Deukhee Lee,

Dosik Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeseong and Son, Geonhui and Lee, Deukhee and Hwang, Dosik}, title = {TWINGS: Thin Plate Splines Warp-aligned Initialization for Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26062-26071} }
CUE: Concept-Aware Multi-Label Expansion to Mitigate Concept Confusion in Long-Tailed Learning: Ruichi Zhang,

Chikai Shang,

Jiacheng Yang,

Mengke Li,

Yang Zhou,

Junlong Gao,

Yang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruichi and Shang, Chikai and Yang, Jiacheng and Li, Mengke and Zhou, Yang and Gao, Junlong and Lu, Yang}, title = {CUE: Concept-Aware Multi-Label Expansion to Mitigate Concept Confusion in Long-Tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15030-15039} }
LOREAL: Mitigating Low-Resolution Challenges in Vision-Language Models with Attribute-driven Prompt Self-Distillation: Xucong Wang,

Pengkun Wang,

Zhe Zhao,

Liheng Yu,

Rui Mao,

Yang Wang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xucong and Wang, Pengkun and Zhao, Zhe and Yu, Liheng and Mao, Rui and Wang, Yang}, title = {LOREAL: Mitigating Low-Resolution Challenges in Vision-Language Models with Attribute-driven Prompt Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39152-39163} }
VOSR: A Vision-Only Generative Model for Image Super-Resolution: Rongyuan Wu,

Lingchen Sun,

Zhengqiang Zhang,

Xiangtao Kong,

Jixin Zhao,

Shihao Wang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Rongyuan and Sun, Lingchen and Zhang, Zhengqiang and Kong, Xiangtao and Zhao, Jixin and Wang, Shihao and Zhang, Lei}, title = {VOSR: A Vision-Only Generative Model for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16311-16321} }
Cov2Pose: Leveraging Spatial Covariance for Direct Manifold-aware 6-DoF Object Pose Estimation: Nassim Ali Ousalah,

Peyman Rostami,

Vincent Gaudillière,

Emmanuel Koumandakis,

Anis Kacem,

Enjie Ghorbel,

Djamila Aouada; [pdf] [supp]
[bibtex]
@InProceedings{Ousalah_2026_CVPR, author = {Ousalah, Nassim Ali and Rostami, Peyman and Gaudilli\`ere, Vincent and Koumandakis, Emmanuel and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {Cov2Pose: Leveraging Spatial Covariance for Direct Manifold-aware 6-DoF Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40727-40738} }
DriveLaW: Unifying Planning and Video Generation in a Latent Driving World: Tianze Xia,

Yongkang Li,

Lijun Zhou,

Jingfeng Yao,

Kaixin Xiong,

Haiyang Sun,

Bing Wang,

Kun Ma,

Guang Chen,

Hangjun Ye,

Wenyu Liu,

Xinggang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Tianze and Li, Yongkang and Zhou, Lijun and Yao, Jingfeng and Xiong, Kaixin and Sun, Haiyang and Wang, Bing and Ma, Kun and Chen, Guang and Ye, Hangjun and Liu, Wenyu and Wang, Xinggang}, title = {DriveLaW: Unifying Planning and Video Generation in a Latent Driving World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39701-39712} }
Mixture of Prototypes for Test-time Adaptive Segmentation: Guangrui Li,

Zhengyu Zhu,

Yongxin Ge; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Guangrui and Zhu, Zhengyu and Ge, Yongxin}, title = {Mixture of Prototypes for Test-time Adaptive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24990-25000} }
OrthoFuse: Training-free Riemannian Fusion of Orthogonal Style-Concept Adapters for Diffusion Models: Ali Aliev,

Kamil Garifullin,

Nikolay Yudin,

Vera Soboleva,

Alexander Molozhavenko,

Ivan Oseledets,

Aibek Alanov,

Maxim Rakhuba; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aliev_2026_CVPR, author = {Aliev, Ali and Garifullin, Kamil and Yudin, Nikolay and Soboleva, Vera and Molozhavenko, Alexander and Oseledets, Ivan and Alanov, Aibek and Rakhuba, Maxim}, title = {OrthoFuse: Training-free Riemannian Fusion of Orthogonal Style-Concept Adapters for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36009-36018} }
MatMart: Material Reconstruction of 3D Objects via Diffusion: Xiuchao Wu,

Pengfei Zhu,

Jiangjing Lyu,

Xinguo Liu,

Jie Guo,

Yanwen Guo,

Weiwei Xu,

Chengfei Lyu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiuchao and Zhu, Pengfei and Lyu, Jiangjing and Liu, Xinguo and Guo, Jie and Guo, Yanwen and Xu, Weiwei and Lyu, Chengfei}, title = {MatMart: Material Reconstruction of 3D Objects via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2336-2345} }
No Labels, No Look-Ahead: Unsupervised Online Video Stabilization with Classical Priors: Tao Liu,

Kan Ren,

Gang Wan,

Shibo Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tao and Ren, Kan and Wan, Gang and Wen, Shibo}, title = {No Labels, No Look-Ahead: Unsupervised Online Video Stabilization with Classical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6868-6877} }
Generative Neural Video Compression via Video Diffusion Prior: Qi Mao,

Hao Cheng,

Tinghan Yang,

Libiao Jin,

Siwei Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Qi and Cheng, Hao and Yang, Tinghan and Jin, Libiao and Ma, Siwei}, title = {Generative Neural Video Compression via Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43239-43248} }
LLaMo: Scaling Pretrained Language Models for Unified Motion Understanding and Generation with Continuous Autoregressive Tokens: Zekun Li,

Sizhe An,

Chengcheng Tang,

Chuan Guo,

Ivan Shugurov,

Linguang Zhang,

Amy Zhao,

Srinath Sridhar,

Lingling Tao,

Abhay Mittal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zekun and An, Sizhe and Tang, Chengcheng and Guo, Chuan and Shugurov, Ivan and Zhang, Linguang and Zhao, Amy and Sridhar, Srinath and Tao, Lingling and Mittal, Abhay}, title = {LLaMo: Scaling Pretrained Language Models for Unified Motion Understanding and Generation with Continuous Autoregressive Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2209-2220} }
Conditional Factuality Controlled LLMs with Generalization Certificates via Conformal Sampling: Kai Ye,

Qingtao Pan,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Kai and Pan, Qingtao and Li, Shuo}, title = {Conditional Factuality Controlled LLMs with Generalization Certificates via Conformal Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3627-3635} }
Stable and Efficient Single-Rollout RL for Multimodal Reasoning: Rui Liu,

Dian Yu,

Lei Ke,

Haolin Liu,

Yujun Zhou,

Zhenwen Liang,

Haitao Mi,

Pratap Tokekar,

Dong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Rui and Yu, Dian and Ke, Lei and Liu, Haolin and Zhou, Yujun and Liang, Zhenwen and Mi, Haitao and Tokekar, Pratap and Yu, Dong}, title = {Stable and Efficient Single-Rollout RL for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12009-12018} }
DPGF-Net: Dual-Prior Guided Fusion Network for Joint Assessment of Perceptual Quality and Semantic Consistency in AI-Generated Images: Tao Li,

Xingran Liao,

Mingliang Zhou; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tao and Liao, Xingran and Zhou, Mingliang}, title = {DPGF-Net: Dual-Prior Guided Fusion Network for Joint Assessment of Perceptual Quality and Semantic Consistency in AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19529-19538} }
All-in-One Slider for Attribute Manipulation in Diffusion Models: Weixin Ye,

Hongguang Zhu,

Wei Wang,

Yahui Liu,

Mengyu Wang,

Xuecheng Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Weixin and Zhu, Hongguang and Wang, Wei and Liu, Yahui and Wang, Mengyu and Nie, Xuecheng}, title = {All-in-One Slider for Attribute Manipulation in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18693-18702} }
Understanding and Mitigating Hallucinations in Multimodal Chain-of-Thought Models: Ji Ma,

Wei Suo,

Peng Wang,

Yanning Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Ji and Suo, Wei and Wang, Peng and Zhang, Yanning}, title = {Understanding and Mitigating Hallucinations in Multimodal Chain-of-Thought Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40224-40234} }
BlackMirror: Black-Box Backdoor Detection for Text-to-Image Models via Instruction-Response Deviation: Feiran Li,

Qianqian Xu,

Shilong Bao,

Zhiyong Yang,

Xilin Zhao,

Xiaochun Cao,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Feiran and Xu, Qianqian and Bao, Shilong and Yang, Zhiyong and Zhao, Xilin and Cao, Xiaochun and Huang, Qingming}, title = {BlackMirror: Black-Box Backdoor Detection for Text-to-Image Models via Instruction-Response Deviation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30098-30109} }
Envision, Attend, Then Respond: Counterfactual Hallucination Mitigation in Large Vision-Language Models: Yuxuan Liang,

Fan Shi,

Rui Zhu,

Xu Li,

Xiaolei Chen,

Zhe Liu,

Bin Li,

Xiangyang Xue; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yuxuan and Shi, Fan and Zhu, Rui and Li, Xu and Chen, Xiaolei and Liu, Zhe and Li, Bin and Xue, Xiangyang}, title = {Envision, Attend, Then Respond: Counterfactual Hallucination Mitigation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18261-18272} }
From Where Things Are to What They Are For: Benchmarking Spatial-Functional Intelligence in Multimodal LLMs: Le Zhang,

Jihan Yang,

Soundarya Krishnan,

Jimit Majmudar,

Xiou Ge,

Prasoon Puri,

Prathamesh Saraf,

Shruti Bhargava,

Dhivya Piraviperumal,

Yinan Ling,

Cindy Pan,

Hong Yu,

Aishwarya Agrawal,

Bo-Hsiang Tseng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Le and Yang, Jihan and Krishnan, Soundarya and Majmudar, Jimit and Ge, Xiou and Puri, Prasoon and Saraf, Prathamesh and Bhargava, Shruti and Piraviperumal, Dhivya and Ling, Yinan and Pan, Cindy and Yu, Hong and Agrawal, Aishwarya and Tseng, Bo-Hsiang}, title = {From Where Things Are to What They Are For: Benchmarking Spatial-Functional Intelligence in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12052-12063} }
Phrase-Grounding-Aware Supervised Fine-Tuning for Chart Recognition via Side-Masked Attention: Koichiro Ito; [pdf] [supp]
[bibtex]
@InProceedings{Ito_2026_CVPR, author = {Ito, Koichiro}, title = {Phrase-Grounding-Aware Supervised Fine-Tuning for Chart Recognition via Side-Masked Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9501-9511} }
Intrinsic Image Fusion for Multi-View 3D Material Reconstruction: Peter Kocsis,

Lukas Höllein,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Kocsis_2026_CVPR, author = {Kocsis, Peter and H\"ollein, Lukas and Nie{\ss}ner, Matthias}, title = {Intrinsic Image Fusion for Multi-View 3D Material Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22571-22580} }
SparseSplat: Towards Applicable Feed-Forward 3D Gaussian Splatting with Pixel-Unaligned Prediction: Zicheng Zhang,

Xiangting Meng,

Ke Wu,

Wenchao Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zicheng and Meng, Xiangting and Wu, Ke and Ding, Wenchao}, title = {SparseSplat: Towards Applicable Feed-Forward 3D Gaussian Splatting with Pixel-Unaligned Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5049-5058} }
Taming Video Models for 3D and 4D Generation via Zero-Shot Camera Control: Chenxi Song,

Yanming Yang,

Tong Zhao,

Ruibo Li,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Chenxi and Yang, Yanming and Zhao, Tong and Li, Ruibo and Zhang, Chi}, title = {Taming Video Models for 3D and 4D Generation via Zero-Shot Camera Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40352-40363} }
Learning Eigenstructures of Unstructured Data Manifolds: Roy Velich,

Arkadi Piven,

David Bensaïd,

Daniel Cremers,

Thomas Dagès,

Ron Kimmel; [pdf] [supp]
[bibtex]
@InProceedings{Velich_2026_CVPR, author = {Velich, Roy and Piven, Arkadi and Bensa{\"\i}d, David and Cremers, Daniel and Dag\`es, Thomas and Kimmel, Ron}, title = {Learning Eigenstructures of Unstructured Data Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36201-36214} }
E2EGS: Event-to-Edge Gaussian Splatting for Pose-Free 3D Reconstruction: Yunsoo Kim,

Changki Sung,

Dasol Hong,

Hyun Myung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Yunsoo and Sung, Changki and Hong, Dasol and Myung, Hyun}, title = {E2EGS: Event-to-Edge Gaussian Splatting for Pose-Free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4922-4931} }
Revisiting Optimal Coding for I-ToF under Practical Sensor Constraints: Wenbin Luo,

Takafumi Iwaguchi,

Ryusuke Sagawa,

Hiroshi Kawasaki; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Wenbin and Iwaguchi, Takafumi and Sagawa, Ryusuke and Kawasaki, Hiroshi}, title = {Revisiting Optimal Coding for I-ToF under Practical Sensor Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12501-12510} }
Scaling Test-Time Robustness of Vision-Language Models via Self-Critical Inference Framework: Kaihua Tang,

Jiaxin Qi,

Jinli Ou,

Yuhua Zheng,

Jianqiang Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Kaihua and Qi, Jiaxin and Ou, Jinli and Zheng, Yuhua and Huang, Jianqiang}, title = {Scaling Test-Time Robustness of Vision-Language Models via Self-Critical Inference Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39361-39371} }
DPAR: Dynamic Patchification for Efficient Autoregressive Visual Generation: Divyansh Srivastava,

Akshay Mehra,

Pranav Maneriker,

Debopam Sanyal,

Vishnu Raj,

Vijay Kamarshi,

Fan Du,

Joshua Kimball; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Divyansh and Mehra, Akshay and Maneriker, Pranav and Sanyal, Debopam and Raj, Vishnu and Kamarshi, Vijay and Du, Fan and Kimball, Joshua}, title = {DPAR: Dynamic Patchification for Efficient Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23215-23226} }
TrackMAE: Video Representation Learning via Track Mask and Predict: Renaud Vandeghen,

Fida Mohammad Thoker,

Marc Van Droogenbroeck,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vandeghen_2026_CVPR, author = {Vandeghen, Renaud and Thoker, Fida Mohammad and Van Droogenbroeck, Marc and Ghanem, Bernard}, title = {TrackMAE: Video Representation Learning via Track Mask and Predict}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13604-13614} }
Spectral Mixture-of-Experts for Continual Learning: Chen Yin,

Xingbo Dong,

Xuelin Shen,

Zhe Jin; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Chen and Dong, Xingbo and Shen, Xuelin and Jin, Zhe}, title = {Spectral Mixture-of-Experts for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39972-39982} }
Learning Convex Decomposition via Feature Fields: Yuezhi Yang,

Qixing Huang,

Mikaela Angelina Uy,

Nicholas Sharp; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuezhi and Huang, Qixing and Uy, Mikaela Angelina and Sharp, Nicholas}, title = {Learning Convex Decomposition via Feature Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36190-36200} }
PrivSynth: Alternating and Control-Based Optimization for Privacy and Utility in Synthetic Data: Xinyuan Zhao,

Hanlin Gu,

Guibao Song,

Gongxi Zhu,

Yifei Zou,

Lixin Fan,

Yuxing Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xinyuan and Gu, Hanlin and Song, Guibao and Zhu, Gongxi and Zou, Yifei and Fan, Lixin and Han, Yuxing}, title = {PrivSynth: Alternating and Control-Based Optimization for Privacy and Utility in Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17430-17439} }
Nonparametric Deep Fine-grained Clustering with Low-Rank Guided Vision-Language Model: Xulun Ye,

Benyu Wu,

Jie Hong,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Wu, Benyu and Hong, Jie and Zhou, Kun}, title = {Nonparametric Deep Fine-grained Clustering with Low-Rank Guided Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2434-2444} }
FreeScale: Scaling 3D Scenes via Certainty-Aware Free-View Generation: Chenhan Jiang,

Yu Chen,

Qingwen Zhang,

Jifei Song,

Songcen Xu,

Dit-Yan Yeung,

Jiankang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Chenhan and Chen, Yu and Zhang, Qingwen and Song, Jifei and Xu, Songcen and Yeung, Dit-Yan and Deng, Jiankang}, title = {FreeScale: Scaling 3D Scenes via Certainty-Aware Free-View Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {330-340} }
CARE-Edit: Condition-Aware Routing of Experts for Contextual Image Editing: Yucheng Wang,

Zedong Wang,

Yuetong Wu,

Yue Ma,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yucheng and Wang, Zedong and Wu, Yuetong and Ma, Yue and Xu, Dan}, title = {CARE-Edit: Condition-Aware Routing of Experts for Contextual Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9019-9028} }
PGA: Prior-free Generative Attack for Practical No-box Scenario: Hongyu Peng,

Xiang Yuan,

Gong Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Hongyu and Yuan, Xiang and Cheng, Gong}, title = {PGA: Prior-free Generative Attack for Practical No-box Scenario}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13296-13305} }
Translating Signals to Languages for sEMG-Based Activity Recognition: Ming Wang,

Haoxuan Qu,

Qiuhong Ke,

Wei Zhou,

Hossein Rahmani,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ming and Qu, Haoxuan and Ke, Qiuhong and Zhou, Wei and Rahmani, Hossein and Liu, Jun}, title = {Translating Signals to Languages for sEMG-Based Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9317-9329} }
Beyond Binary Contrast: Modeling Continuous Skeleton Action Spaces with Transitional Anchors: Yingjie Feng,

Yi Wang,

Jiaze Wang,

Anfeng Liu,

Zhuotao Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yingjie and Wang, Yi and Wang, Jiaze and Liu, Anfeng and Tian, Zhuotao}, title = {Beyond Binary Contrast: Modeling Continuous Skeleton Action Spaces with Transitional Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6075-6084} }
Enhancing Mixture-of-Experts Specialization via Cluster-Aware Upcycling: Sanghyeok Chu,

Pyunghwan Ahn,

Gwangmo Song,

Seung Hwan Kim,

Honglak Lee,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Sanghyeok and Ahn, Pyunghwan and Song, Gwangmo and Kim, Seung Hwan and Lee, Honglak and Han, Bohyung}, title = {Enhancing Mixture-of-Experts Specialization via Cluster-Aware Upcycling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11283-11292} }
M4Human: A Large-Scale Multimodal mmWave Radar Benchmark for Human Mesh Reconstruction: Junqiao Fan,

Yunjiao Zhou,

Yizhuo Yang,

Xinyuan Cui,

Jiarui Zhang,

Lihua Xie,

Jianfei Yang,

Chris Xiaoxuan Lu,

Fangqiang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Junqiao and Zhou, Yunjiao and Yang, Yizhuo and Cui, Xinyuan and Zhang, Jiarui and Xie, Lihua and Yang, Jianfei and Lu, Chris Xiaoxuan and Ding, Fangqiang}, title = {M4Human: A Large-Scale Multimodal mmWave Radar Benchmark for Human Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42836-42846} }
Deep Feature Deformation Weights: Richard Liu,

Itai Lang,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Richard and Lang, Itai and Hanocka, Rana}, title = {Deep Feature Deformation Weights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27378-27387} }
ELiC: Efficient LiDAR Geometry Compression via Cross-Bit-depth Feature Propagation and Bag-of-Encoders: Junsik Kim,

Gun Bang,

Soowoong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Junsik and Bang, Gun and Kim, Soowoong}, title = {ELiC: Efficient LiDAR Geometry Compression via Cross-Bit-depth Feature Propagation and Bag-of-Encoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39011-39020} }
PV-Ground: Text-Guided Point-Voxel Interaction for 3D Visual Grounding: Junpeng Shang,

Feifei Shao,

Jun Xiao,

Lin Li,

Hongwei Wang,

Dongfang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Shang_2026_CVPR, author = {Shang, Junpeng and Shao, Feifei and Xiao, Jun and Li, Lin and Wang, Hongwei and Ma, Dongfang}, title = {PV-Ground: Text-Guided Point-Voxel Interaction for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38657-38667} }
StoryTailor:A Zero-Shot Pipeline for Action-Rich Multi-Subject Visual Narratives: Jinghao Hu,

Yuhe Zhang,

Guohua Geng,

Kang Li,

Han Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Jinghao and Zhang, Yuhe and Geng, Guohua and Li, Kang and Zhang, Han}, title = {StoryTailor:A Zero-Shot Pipeline for Action-Rich Multi-Subject Visual Narratives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21922-21931} }
PhysGS: Bayesian-Inferred Gaussian Splatting for Physical Property Estimation: Samarth Chopra,

Jing Liang,

Gershom Seneviratne,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chopra_2026_CVPR, author = {Chopra, Samarth and Liang, Jing and Seneviratne, Gershom and Manocha, Dinesh}, title = {PhysGS: Bayesian-Inferred Gaussian Splatting for Physical Property Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18980-18990} }
GeoBridge: A Semantic-Anchored Multi-View Foundation Model Bridging Images and Text for Geo-Localization: Zixuan Song,

Jing Zhang,

Di Wang,

Zidie Zhou,

Wenbin Liu,

Haonan Guo,

En Wang,

Bo Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Zixuan and Zhang, Jing and Wang, Di and Zhou, Zidie and Liu, Wenbin and Guo, Haonan and Wang, En and Du, Bo}, title = {GeoBridge: A Semantic-Anchored Multi-View Foundation Model Bridging Images and Text for Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27793-27803} }
Adapting a Pre-trained Single-Cell Foundation Model to Spatial Gene Expression Generation from Histology Images: Donghai Fang,

Yongheng Li,

Zhen Wang,

Yuansong Zeng,

Wenwen Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Donghai and Li, Yongheng and Wang, Zhen and Zeng, Yuansong and Min, Wenwen}, title = {Adapting a Pre-trained Single-Cell Foundation Model to Spatial Gene Expression Generation from Histology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5720-5729} }
Eliciting Complex Spatial Reasoning in MLLMs through Wide-Baseline Matching: Hao Zhong,

Muzhi Zhu,

Shenyan Zeng,

Anzhou Li,

Cong Chen,

Hua Geng,

Duochao Shi,

Wentao Ye,

Tao Lin,

Hao Chen,

Chunhua Shen; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Hao and Zhu, Muzhi and Zeng, Shenyan and Li, Anzhou and Chen, Cong and Geng, Hua and Shi, Duochao and Ye, Wentao and Lin, Tao and Chen, Hao and Shen, Chunhua}, title = {Eliciting Complex Spatial Reasoning in MLLMs through Wide-Baseline Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16768-16778} }
Learning Differentiable Hierarchies in 3D Gaussian Splatting: Youqi Pan,

Wugen Zhou,

Hongbin Zha; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Youqi and Zhou, Wugen and Zha, Hongbin}, title = {Learning Differentiable Hierarchies in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40939-40948} }
GraspLDP: Towards Generalizable Grasping Policy via Latent Diffusion: Enda Xiang,

Haoxiang Ma,

Xinzhu Ma,

Zicheng Liu,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Enda and Ma, Haoxiang and Ma, Xinzhu and Liu, Zicheng and Huang, Di}, title = {GraspLDP: Towards Generalizable Grasping Policy via Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28032-28041} }
Attention, May I Have Your Decision? Localizing Generative Choices in Diffusion Models: Katarzyna Zaleska,

Łukasz Popek,

Monika Wysoczańska,

Kamil Deja; [pdf] [supp]
[bibtex]
@InProceedings{Zaleska_2026_CVPR, author = {Zaleska, Katarzyna and Popek, {\L}ukasz and Wysocza\'nska, Monika and Deja, Kamil}, title = {Attention, May I Have Your Decision? Localizing Generative Choices in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30854-30863} }
Multi-speaker Attention Alignment for Multimodal Social Interaction: Liangyang Ouyang,

Yifei Huang,

Mingfang Zhang,

Caixin Kang,

Ryosuke Furuta,

Yoichi Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Liangyang and Huang, Yifei and Zhang, Mingfang and Kang, Caixin and Furuta, Ryosuke and Sato, Yoichi}, title = {Multi-speaker Attention Alignment for Multimodal Social Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24608-24619} }
Underground Plant Exploration: Non-Destructive 3D Root Assessment with GPR Based on Point Graph Neural Network: Yuwei Zhou,

Guoyu Lu; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yuwei and Lu, Guoyu}, title = {Underground Plant Exploration: Non-Destructive 3D Root Assessment with GPR Based on Point Graph Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15616-15626} }
OmniDocLayout: Towards Diverse Document Layout Generation via Coarse-to-Fine LLM Learning: Hengrui Kang,

Zhuangcheng Gu,

Zhiyuan Zhao,

Zichen Wen,

Bin Wang,

Weijia Li,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Hengrui and Gu, Zhuangcheng and Zhao, Zhiyuan and Wen, Zichen and Wang, Bin and Li, Weijia and He, Conghui}, title = {OmniDocLayout: Towards Diverse Document Layout Generation via Coarse-to-Fine LLM Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3208-3218} }
Few-for-Many Personalized Federated Learning: Ping Guo,

Tiantian Zhang,

Xi Lin,

Xiang Li,

Zhi-Ri Tang,

Qingfu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ping and Zhang, Tiantian and Lin, Xi and Li, Xiang and Tang, Zhi-Ri and Zhang, Qingfu}, title = {Few-for-Many Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17515-17524} }
Video Generation with Stable Transparency via Shiftable RGB-A Distribution Learner: Haotian Dong,

Wenjing Wang,

Chen Li,

Jing Lyu,

Di Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Haotian and Wang, Wenjing and Li, Chen and Lyu, Jing and Lin, Di}, title = {Video Generation with Stable Transparency via Shiftable RGB-A Distribution Learner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1885-1894} }
Lenses: Toward Polysemous Vision-Language Understanding: Hani Alomari,

Ali Asgarov,

Chris Thomas; [pdf] [supp]
[bibtex]
@InProceedings{Alomari_2026_CVPR, author = {Alomari, Hani and Asgarov, Ali and Thomas, Chris}, title = {Lenses: Toward Polysemous Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37810-37820} }
Preference-Aligned LoRA Merging: Preserving Subspace Coverage and Addressing Directional Anisotropy: Wooseong Jeong,

Wonyoung Lee,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wooseong and Lee, Wonyoung and Yoon, Kuk-Jin}, title = {Preference-Aligned LoRA Merging: Preserving Subspace Coverage and Addressing Directional Anisotropy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {823-835} }
Uni3R: Unified 3D Reconstruction and Semantic Understanding via Generalizable Gaussian Splatting from Unposed Multi-View Images: Xiangyu Sun,

Haoyi Jiang,

Liu Liu,

Seungtae Nam,

Gyeongjin Kang,

Xinjie Wang,

Wei Sui,

Zhizhong Su,

Wenyu Liu,

Xinggang Wang,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xiangyu and Jiang, Haoyi and Liu, Liu and Nam, Seungtae and Kang, Gyeongjin and Wang, Xinjie and Sui, Wei and Su, Zhizhong and Liu, Wenyu and Wang, Xinggang and Park, Eunbyung}, title = {Uni3R: Unified 3D Reconstruction and Semantic Understanding via Generalizable Gaussian Splatting from Unposed Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33280-33290} }
HanDyVQA: A Video QA Benchmark for Fine-Grained Hand-Object Interaction Dynamics: Masatoshi Tateno,

Gido Kato,

Hirokatsu Kataoka,

Yoichi Sato,

Takuma Yagi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tateno_2026_CVPR, author = {Tateno, Masatoshi and Kato, Gido and Kataoka, Hirokatsu and Sato, Yoichi and Yagi, Takuma}, title = {HanDyVQA: A Video QA Benchmark for Fine-Grained Hand-Object Interaction Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3455-3465} }
Composite-Attribute Person Re-Identification via Pose-Guided Disentanglement: Kartik Patwari,

Noranart Vesdapunt,

Chien-Yi Wang,

Dawei Li,

Cong Phuoc Huynh,

Ning Zhou,

Chen-Nee Chuah,

Kah Kuen Fu; [pdf] [supp]
[bibtex]
@InProceedings{Patwari_2026_CVPR, author = {Patwari, Kartik and Vesdapunt, Noranart and Wang, Chien-Yi and Li, Dawei and Huynh, Cong Phuoc and Zhou, Ning and Chuah, Chen-Nee and Fu, Kah Kuen}, title = {Composite-Attribute Person Re-Identification via Pose-Guided Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13812-13823} }
AnchorSplat: Feed-Forward 3D Gaussian Splatting With 3D Geometric Priors: Xiaoxue Zhang,

Xiaoxu Zheng,

Yixuan Yin,

Tiao Zhao,

Kaihua Tang,

Michael Bi Mi,

Zhan Xu,

Dave Zhenyu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoxue and Zheng, Xiaoxu and Yin, Yixuan and Zhao, Tiao and Tang, Kaihua and Mi, Michael Bi and Xu, Zhan and Chen, Dave Zhenyu}, title = {AnchorSplat: Feed-Forward 3D Gaussian Splatting With 3D Geometric Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18924-18933} }
Circular-DPO: Aligning Multi-Stage 3D Generative Models via Preference Feedback Loop: Zejian Li,

Jiarui Ma,

Han Xu,

Weiting Zheng,

Yangrui Zhu,

Chenye Meng,

Pei Chen,

Ling Yang,

Zhiyuan Yang,

Changyuan Yang,

Guang Yang,

Immanuel Koh,

Lingyun Sun; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zejian and Ma, Jiarui and Xu, Han and Zheng, Weiting and Zhu, Yangrui and Meng, Chenye and Chen, Pei and Yang, Ling and Yang, Zhiyuan and Yang, Changyuan and Yang, Guang and Koh, Immanuel and Sun, Lingyun}, title = {Circular-DPO: Aligning Multi-Stage 3D Generative Models via Preference Feedback Loop}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32592-32601} }
YOLO-ULM: Ultra-Lightweight Models for Real-Time Object Detection: Shasha Han,

Chong Li,

Xinning Wang,

Xuebo Li; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Shasha and Li, Chong and Wang, Xinning and Li, Xuebo}, title = {YOLO-ULM: Ultra-Lightweight Models for Real-Time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18418-18427} }
ProcessMaker: A Generalized Process Visualization Framework with Adaptive Sequence Steps on Diffusion Transformers: Mengling Xu,

Sisi You,

Yaning Li,

Bing-Kun Bao; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Mengling and You, Sisi and Li, Yaning and Bao, Bing-Kun}, title = {ProcessMaker: A Generalized Process Visualization Framework with Adaptive Sequence Steps on Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25699-25708} }
Align Images Before You Generate: Shihua Zhang,

Qiuhong Shen,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shihua and Shen, Qiuhong and Wang, Xinchao}, title = {Align Images Before You Generate}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30521-30531} }
Recover to Predict: Progressive Retrospective Learning for Variable-Length Trajectory Prediction: Hao Zhou,

Lu Qi,

Xiangtai Li,

Jie Zhang,

Yi Liu,

Xu Yang,

Mingyu Fan,

Fei Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Qi, Lu and Li, Xiangtai and Zhang, Jie and Liu, Yi and Yang, Xu and Fan, Mingyu and Luo, Fei}, title = {Recover to Predict: Progressive Retrospective Learning for Variable-Length Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17863-17873} }
SpikeTrack: High-performance and Energy-efficient Event-Based Object Tracking with Spiking Neural Network: Yang Wang,

Jiqing Zhang,

Chuanyu Sun,

Qianhui Liu,

Huilin Ge,

Ziqi Wei,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yang and Zhang, Jiqing and Sun, Chuanyu and Liu, Qianhui and Ge, Huilin and Wei, Ziqi and Yang, Xin}, title = {SpikeTrack: High-performance and Energy-efficient Event-Based Object Tracking with Spiking Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {926-935} }
CoSMo3D: Open-World Promptable 3D Semantic Segmentation through LLM-Guided Canonical Spatial Modeling: Li Jin,

Weikai Chen,

Yujie Wang,

Yingda Yin,

Zeyu Hu,

Runze Zhang,

Keyang Luo,

Shengju Qian,

Xin Wang,

Xueying Qin; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Li and Chen, Weikai and Wang, Yujie and Yin, Yingda and Hu, Zeyu and Zhang, Runze and Luo, Keyang and Qian, Shengju and Wang, Xin and Qin, Xueying}, title = {CoSMo3D: Open-World Promptable 3D Semantic Segmentation through LLM-Guided Canonical Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14325-14334} }
Stabilizing Streaming Video Geometry via Dynamic Feature Normalization: Xiaoyang Lyu,

Muxin Liu,

Xiaoshan Wu,

Ruicheng Wang,

Yi-Hua Huang,

Yang-Tian Sun,

Shaoshuai Shi,

Xiaojuan Qi; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Xiaoyang and Liu, Muxin and Wu, Xiaoshan and Wang, Ruicheng and Huang, Yi-Hua and Sun, Yang-Tian and Shi, Shaoshuai and Qi, Xiaojuan}, title = {Stabilizing Streaming Video Geometry via Dynamic Feature Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7577-7587} }
Learning to Solve PDEs on Neural Shape Representations: Lilian Welschinger,

Yilin Liu,

Zican Wang,

Niloy J. Mitra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Welschinger_2026_CVPR, author = {Welschinger, Lilian and Liu, Yilin and Wang, Zican and Mitra, Niloy J.}, title = {Learning to Solve PDEs on Neural Shape Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20263-20272} }
FantasyVLN: Unified Multimodal Chain-of-Thought Reasoning for Vision-and-Language Navigation: Jing Zuo,

Lingzhou Mu,

Fan Jiang,

Chengcheng Ma,

Mu Xu,

Yonggang Qi; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Jing and Mu, Lingzhou and Jiang, Fan and Ma, Chengcheng and Xu, Mu and Qi, Yonggang}, title = {FantasyVLN: Unified Multimodal Chain-of-Thought Reasoning for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29674-29683} }
Omni-Fake: Benchmarking Unified Multimodal Social Media Deepfake Detection: Tianxiao Li,

Zhenglin Huang,

Haiquan Wen,

Yiwei He,

Xinze Li,

Bingyu Zhu,

Wuhui Duan,

Congang Chen,

Zeyu Fu,

Yi Dong,

Baoyuan Wu,

Xiangtai Li,

Guangliang Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tianxiao and Huang, Zhenglin and Wen, Haiquan and He, Yiwei and Li, Xinze and Zhu, Bingyu and Duan, Wuhui and Chen, Congang and Fu, Zeyu and Dong, Yi and Wu, Baoyuan and Li, Xiangtai and Cheng, Guangliang}, title = {Omni-Fake: Benchmarking Unified Multimodal Social Media Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30299-30311} }
Mimic Human Cognition, Master Multi-Image Reasoning: A Meta-Action Framework for Enhanced Visual Understanding: Jianghao Yin,

Qingbin Li,

Kun Sun,

Cheng Ding,

Jie Wang,

Qin Chen,

Jie Zhou,

Nan Wang,

Changqing Li,

Pei Wu,

Jian Xu,

Zheming Yang,

Liang He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Jianghao and Li, Qingbin and Sun, Kun and Ding, Cheng and Wang, Jie and Chen, Qin and Zhou, Jie and Wang, Nan and Li, Changqing and Wu, Pei and Xu, Jian and Yang, Zheming and He, Liang}, title = {Mimic Human Cognition, Master Multi-Image Reasoning: A Meta-Action Framework for Enhanced Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19253-19264} }
PoseGaussian: 6D Pose Estimation for Unseen Objects via Sparse-View Object-Level 3D Gaussian Splatting: Wubin Shi,

Shaoyan Gai,

Feipeng Da; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Wubin and Gai, Shaoyan and Da, Feipeng}, title = {PoseGaussian: 6D Pose Estimation for Unseen Objects via Sparse-View Object-Level 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4698-4707} }
ELV-Halluc: Benchmarking Semantic Aggregation Hallucinations in Video Understanding: Hao Lu,

Jiahao Wang,

Yaolun Zhang,

Ruohui Wang,

Xuanyu Zheng,

Yepeng Tang,

Dahua Lin,

Lewei Lu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Hao and Wang, Jiahao and Zhang, Yaolun and Wang, Ruohui and Zheng, Xuanyu and Tang, Yepeng and Lin, Dahua and Lu, Lewei}, title = {ELV-Halluc: Benchmarking Semantic Aggregation Hallucinations in Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32572-32581} }
AviaSafe: A Physics-Informed Data-Driven Model for Aviation Safety-Critical Cloud Forecasts: Zijian Zhu,

Qiusheng Huang,

Anboyu Guo,

Xiaohui Zhong,

Hao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zijian and Huang, Qiusheng and Guo, Anboyu and Zhong, Xiaohui and Li, Hao}, title = {AviaSafe: A Physics-Informed Data-Driven Model for Aviation Safety-Critical Cloud Forecasts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33143-33152} }
Select, Hypothesize and Verify: Towards Verified Neuron Concept Interpretation: ZeBin Ji,

Yang Hu,

Xiuli Bi,

Bo Liu,

Bin Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, ZeBin and Hu, Yang and Bi, Xiuli and Liu, Bo and Xiao, Bin}, title = {Select, Hypothesize and Verify: Towards Verified Neuron Concept Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31316-31325} }
M4-RAG: A Massive-Scale Multilingual Multi-Cultural Multimodal RAG: David Anugraha,

Patrick Amadeus Irawan,

Anshul Singh,

En-Shiun Annie Lee,

Genta Indra Winata; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Anugraha_2026_CVPR, author = {Anugraha, David and Irawan, Patrick Amadeus and Singh, Anshul and Lee, En-Shiun Annie and Winata, Genta Indra}, title = {M4-RAG: A Massive-Scale Multilingual Multi-Cultural Multimodal RAG}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23083-23094} }
SparseOIT: Improving Order-Independent Transparency 3DGS via Active Set Method: Wentao Yang,

Fanzhen Kong,

Zejian Kang,

Xiangru Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Wentao and Kong, Fanzhen and Kang, Zejian and Huang, Xiangru}, title = {SparseOIT: Improving Order-Independent Transparency 3DGS via Active Set Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41012-41021} }
Hypergraph-State Collaborative Reasoning for Multi-Object Tracking: Zikai Song,

Junqing Yu,

Yi-Ping Phoebe Chen,

Wei Yang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Zikai and Yu, Junqing and Chen, Yi-Ping Phoebe and Yang, Wei and Wang, Xinchao}, title = {Hypergraph-State Collaborative Reasoning for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28123-28133} }
Simple Agents Outperform Experts in Biomedical Imaging Workflow Optimization: Xuefei Wang,

Kai Horstmann,

Ethan Lin,

Jonathan Chen,

Alexander Farhang,

Sophia Stiles,

Atharva Sehgal,

Jonathan Light,

David Van Valen,

Yisong Yue,

Jennifer J. Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuefei and Horstmann, Kai and Lin, Ethan and Chen, Jonathan and Farhang, Alexander and Stiles, Sophia and Sehgal, Atharva and Light, Jonathan and Van Valen, David and Yue, Yisong and Sun, Jennifer J.}, title = {Simple Agents Outperform Experts in Biomedical Imaging Workflow Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13680-13690} }
M^3KG-RAG: Multi-hop Multimodal Knowledge Graph-enhanced Retrieval-Augmented Generation: Hyeongcheol Park,

Jiyoung Seo,

Jaewon Mun,

Hogun Park,

Wonmin Byeon,

Sung June Kim,

Hyeonsoo Im,

JeungSub Lee,

Sangpil Kim; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Hyeongcheol and Seo, Jiyoung and Mun, Jaewon and Park, Hogun and Byeon, Wonmin and Kim, Sung June and Im, Hyeonsoo and Lee, JeungSub and Kim, Sangpil}, title = {M{\textasciicircum}3KG-RAG: Multi-hop Multimodal Knowledge Graph-enhanced Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14968-14978} }
Guiding Token-Sparse Diffusion Models: Felix Krause,

Stefan Andreas Baumann,

Johannes Schusterbauer,

Olga Grebenkova,

Ming Gui,

Vincent Tao Hu,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krause_2026_CVPR, author = {Krause, Felix and Baumann, Stefan Andreas and Schusterbauer, Johannes and Grebenkova, Olga and Gui, Ming and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Guiding Token-Sparse Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35789-35799} }
Foundation Encoders Are All You Need for Preference-Aware Personalization: Hyungjin Kim,

Seokho Ahn,

Young-Duk Seo; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hyungjin and Ahn, Seokho and Seo, Young-Duk}, title = {Foundation Encoders Are All You Need for Preference-Aware Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14692-14701} }
SmokeSVD: Smoke Reconstruction from A Single View via Progressive Novel View Synthesis and Refinement with Diffusion Models: Chen Li,

Shanshan Dong,

Sheng Qiu,

Jianmin Han,

Yibo Zhao,

Zan Gao,

Taku Komura,

Kemeng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chen and Dong, Shanshan and Qiu, Sheng and Han, Jianmin and Zhao, Yibo and Gao, Zan and Komura, Taku and Huang, Kemeng}, title = {SmokeSVD: Smoke Reconstruction from A Single View via Progressive Novel View Synthesis and Refinement with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7414-7424} }
FedDAP: Domain-Aware Prototype Learning for Federated Learning under Domain Shift: Huy Q. Le,

Loc X. Nguyen,

Yu Qiao,

Seong Tae Kim,

Eui-Nam Huh,

Choong Seon Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Huy Q. and Nguyen, Loc X. and Qiao, Yu and Kim, Seong Tae and Huh, Eui-Nam and Hong, Choong Seon}, title = {FedDAP: Domain-Aware Prototype Learning for Federated Learning under Domain Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3390-3399} }
RealUnify: Do Unified Models Truly Benefit from Unification? A Comprehensive Benchmark: Yang Shi,

Yuhao Dong,

Yue Ding,

Yuran Wang,

Xuanyu Zhu,

Sheng Zhou,

Wenting Liu,

Haochen Tian,

Rundong Wang,

Huanqian Wang,

Zuyan Liu,

Bohan Zeng,

Ruizhe Chen,

Qixun Wang,

Zhuoran Zhang,

Xinlong Chen,

Chengzhuo Tong,

Bozhou Li,

Qiang Liu,

Haotian Wang,

Wenjing Yang,

Yuanxing Zhang,

Pengfei Wan,

Yi-Fan Zhang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yang and Dong, Yuhao and Ding, Yue and Wang, Yuran and Zhu, Xuanyu and Zhou, Sheng and Liu, Wenting and Tian, Haochen and Wang, Rundong and Wang, Huanqian and Liu, Zuyan and Zeng, Bohan and Chen, Ruizhe and Wang, Qixun and Zhang, Zhuoran and Chen, Xinlong and Tong, Chengzhuo and Li, Bozhou and Liu, Qiang and Wang, Haotian and Yang, Wenjing and Zhang, Yuanxing and Wan, Pengfei and Zhang, Yi-Fan and Liu, Ziwei}, title = {RealUnify: Do Unified Models Truly Benefit from Unification? A Comprehensive Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22488-22497} }
ScenDi: 3D-to-2D Scene Diffusion Cascades for Urban Generation: Hanlei Guo,

Jiahao Shao,

Xinya Chen,

Xiyang Tan,

Sheng Miao,

Yujun Shen,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hanlei and Shao, Jiahao and Chen, Xinya and Tan, Xiyang and Miao, Sheng and Shen, Yujun and Liao, Yiyi}, title = {ScenDi: 3D-to-2D Scene Diffusion Cascades for Urban Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40291-40302} }
Grounded Chain-of-Thought for Multimodal Large Language Models: Qiong Wu,

Xiangcong Yang,

Yiyi Zhou,

Chenxin Fang,

Baiyang Song,

Xiaoshuai Sun,

Rongrong Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Qiong and Yang, Xiangcong and Zhou, Yiyi and Fang, Chenxin and Song, Baiyang and Sun, Xiaoshuai and Ji, Rongrong}, title = {Grounded Chain-of-Thought for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33577-33587} }
MagicQuill V2: Precise and Interactive Image Editing with Layered Visual Cues: Zichen Liu,

Yue Yu,

Hao Ouyang,

Qiuyu Wang,

Shuailei Ma,

Ka Leong Cheng,

Wen Wang,

Qingyan Bai,

Yuxuan Zhang,

Yanhong Zeng,

Yixuan Li,

Xing Zhu,

Yujun Shen,

Qifeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zichen and Yu, Yue and Ouyang, Hao and Wang, Qiuyu and Ma, Shuailei and Cheng, Ka Leong and Wang, Wen and Bai, Qingyan and Zhang, Yuxuan and Zeng, Yanhong and Li, Yixuan and Zhu, Xing and Shen, Yujun and Chen, Qifeng}, title = {MagicQuill V2: Precise and Interactive Image Editing with Layered Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22467-22477} }
MambaLiteUNet: Cross-Gated Adaptive Feature Fusion for Robust Skin Lesion Segmentation: Md Maklachur Rahman,

Soon Ki Jung,

Tracy Hammond; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahman_2026_CVPR, author = {Rahman, Md Maklachur and Jung, Soon Ki and Hammond, Tracy}, title = {MambaLiteUNet: Cross-Gated Adaptive Feature Fusion for Robust Skin Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8556-8565} }
Node-RF: Learning Generalized Continuous Space-Time Scene Dynamics with Neural ODE-based NeRFs: Hiran Sarkar,

Liming Kuang,

Yordanka Velikova,

Benjamin Busam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarkar_2026_CVPR, author = {Sarkar, Hiran and Kuang, Liming and Velikova, Yordanka and Busam, Benjamin}, title = {Node-RF: Learning Generalized Continuous Space-Time Scene Dynamics with Neural ODE-based NeRFs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15411-15420} }
EventHub: Data Factory for Generalizable Event-Based Stereo Networks without Active Sensors: Luca Bartolomei,

Fabio Tosi,

Matteo Poggi,

Stefano Mattoccia,

Guillermo Gallego; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bartolomei_2026_CVPR, author = {Bartolomei, Luca and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano and Gallego, Guillermo}, title = {EventHub: Data Factory for Generalizable Event-Based Stereo Networks without Active Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37063-37074} }
Boosting Self-Supervised Tracking with Contextual Prompts and Noise Learning: Yaozong Zheng,

Qihua Liang,

Bineng Zhong,

Shuimu Zeng,

Yuanliang Xue,

Ning Li,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yaozong and Liang, Qihua and Zhong, Bineng and Zeng, Shuimu and Xue, Yuanliang and Li, Ning and Song, Shuxiang}, title = {Boosting Self-Supervised Tracking with Contextual Prompts and Noise Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35197-35206} }
EI-Part:Explode for Completion and Implode for Refinement: Wanhu Sun,

Zhongjin Luo,

Heliang Zheng,

Jiahao Chang,

Chongjie Ye,

Huiang He,

Shengchu Zhao,

Rongfei Jia,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Wanhu and Luo, Zhongjin and Zheng, Heliang and Chang, Jiahao and Ye, Chongjie and He, Huiang and Zhao, Shengchu and Jia, Rongfei and Han, Xiaoguang}, title = {EI-Part:Explode for Completion and Implode for Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27007-27017} }
UniGame: Turning a Unified Multimodal Model Into Its Own Adversary: Zhaolong Su,

Wang Lu,

Hao Chen,

Sharon Li,

Jindong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Zhaolong and Lu, Wang and Chen, Hao and Li, Sharon and Wang, Jindong}, title = {UniGame: Turning a Unified Multimodal Model Into Its Own Adversary}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37632-37641} }
Global Underwater Geolocation from Time-Lapse Polarization Imagery: Sara Aghajanzadeh,

Xiaoyang Bai,

Zhongmin Zhu,

David Forsyth,

Viktor Gruev; [pdf] [supp]
[bibtex]
@InProceedings{Aghajanzadeh_2026_CVPR, author = {Aghajanzadeh, Sara and Bai, Xiaoyang and Zhu, Zhongmin and Forsyth, David and Gruev, Viktor}, title = {Global Underwater Geolocation from Time-Lapse Polarization Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6464-6473} }
Assignment-Driven Hash Learning in a Hyper-Semantic Space for On-the-Fly Category Discovery: Kaibing Yang,

Yucheng Wang,

Tingzhang Luo; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Kaibing and Wang, Yucheng and Luo, Tingzhang}, title = {Assignment-Driven Hash Learning in a Hyper-Semantic Space for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11303-11312} }
MASQuant: Modality-Aware Smoothing Quantization for Multimodal Large Language Models: Lulu Hu,

Wenhu Xiao,

Xin Chen,

Xinhua Xu,

Bowen Xu,

Kun Li,

Yongliang Tao; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Lulu and Xiao, Wenhu and Chen, Xin and Xu, Xinhua and Xu, Bowen and Li, Kun and Tao, Yongliang}, title = {MASQuant: Modality-Aware Smoothing Quantization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8707-8716} }
CRAFT-LoRA: Content-Style Personalization via Rank-Constrained Adaptation and Training-Free Fusion: Yu Li,

Yujun Cai,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yu and Cai, Yujun and Zhang, Chi}, title = {CRAFT-LoRA: Content-Style Personalization via Rank-Constrained Adaptation and Training-Free Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7654-7663} }
GS^2: Graph-based Spatial Distribution Optimization for Compact 3D Gaussian Splatting: Xianben Yang,

Tao Wang,

Yuxuan Li,

Yi Jin,

Haibin Ling; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xianben and Wang, Tao and Li, Yuxuan and Jin, Yi and Ling, Haibin}, title = {GS{\textasciicircum}2: Graph-based Spatial Distribution Optimization for Compact 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33259-33268} }
PCA-Seg: Revisiting Cost Aggregation for Open-Vocabulary Semantic and Part Segmentation: Jianjian Yin,

Tao Chen,

Yi Chen,

Gensheng Pei,

Xiangbo Shu,

Yazhou Yao,

Fumin Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Jianjian and Chen, Tao and Chen, Yi and Pei, Gensheng and Shu, Xiangbo and Yao, Yazhou and Shen, Fumin}, title = {PCA-Seg: Revisiting Cost Aggregation for Open-Vocabulary Semantic and Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27633-27643} }
Spatio-Temporal Difference Guided Motion Deblurring with the Complementary Vision Sensor: Yapeng Meng,

Lin Yang,

Yuguo Chen,

Xiangru Chen,

Taoyi Wang,

Lijian Wang,

Zheyu Yang,

Yihan Lin,

Rong Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Yapeng and Yang, Lin and Chen, Yuguo and Chen, Xiangru and Wang, Taoyi and Wang, Lijian and Yang, Zheyu and Lin, Yihan and Zhao, Rong}, title = {Spatio-Temporal Difference Guided Motion Deblurring with the Complementary Vision Sensor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37496-37506} }
LaRP: Efficient Multi-View Inpainting with Latent Reprojection Priors: Gaoyang Zhang,

Xinguo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gaoyang and Liu, Xinguo}, title = {LaRP: Efficient Multi-View Inpainting with Latent Reprojection Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21772-21783} }
Geometry-Aware Cross-Modal Graph Alignment for Referring Segmentation in 3D Gaussian Splatting: Yuwen Tao,

Kanglei Zhou,

Chang Li,

Liyuan Wang; [pdf]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Yuwen and Zhou, Kanglei and Li, Chang and Wang, Liyuan}, title = {Geometry-Aware Cross-Modal Graph Alignment for Referring Segmentation in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20433-20442} }
Predicting Spatial Transcriptomics from Histology Images via High-Order Multi-Cell Interaction Modeling: Youhan Sun,

Jiahua Rao,

Kangrui Du,

Jiancong Xie,

Yuedong Yang; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Youhan and Rao, Jiahua and Du, Kangrui and Xie, Jiancong and Yang, Yuedong}, title = {Predicting Spatial Transcriptomics from Histology Images via High-Order Multi-Cell Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19781-19790} }
Breaking Semantic Boundaries: Distribution-Guided Semantic Exploration for Creative Generation: Fu Feng,

Yucheng Xie,

Ruixiao Shi,

Xu Yang,

Jing Wang,

Xin Geng; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Fu and Xie, Yucheng and Shi, Ruixiao and Yang, Xu and Wang, Jing and Geng, Xin}, title = {Breaking Semantic Boundaries: Distribution-Guided Semantic Exploration for Creative Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14253-14262} }
ResiHMR: Residual-Limb Aware Single-Image 3D Human Mesh Recovery for Individuals with Limb Loss: Jiaying Ying,

Heming Du,

Kaihao Zhang,

Sean M. Tweedy,

Xin Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2026_CVPR, author = {Ying, Jiaying and Du, Heming and Zhang, Kaihao and Tweedy, Sean M. and Yu, Xin}, title = {ResiHMR: Residual-Limb Aware Single-Image 3D Human Mesh Recovery for Individuals with Limb Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13940-13950} }
RoboWheel: A Data Engine from Real-World Human Demonstrations for Cross-Embodiment Robotic Learning: Yuhong Zhang,

Zihan Gao,

Shengpeng Li,

Ling-Hao Chen,

Kaisheng Liu,

Runqing Cheng,

Xiao Lin,

Junjia Liu,

Zhuoheng Li,

Jingyi Feng,

Ziyan He,

Jintian Lin,

Zheyan Huang,

Zhifang Liu,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuhong and Gao, Zihan and Li, Shengpeng and Chen, Ling-Hao and Liu, Kaisheng and Cheng, Runqing and Lin, Xiao and Liu, Junjia and Li, Zhuoheng and Feng, Jingyi and He, Ziyan and Lin, Jintian and Huang, Zheyan and Liu, Zhifang and Wang, Haoqian}, title = {RoboWheel: A Data Engine from Real-World Human Demonstrations for Cross-Embodiment Robotic Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6664-6674} }
Motus: A Unified Latent Action World Model: Hongzhe Bi,

Hengkai Tan,

Shenghao Xie,

Zeyuan Wang,

Shuhe Huang,

Haitian Liu,

Ruowen Zhao,

Yao Feng,

Chendong Xiang,

Yinze Rong,

Hongyan Zhao,

Hanyu Liu,

Zhizhong Su,

Lei Ma,

Hang Su,

Jun Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Bi_2026_CVPR, author = {Bi, Hongzhe and Tan, Hengkai and Xie, Shenghao and Wang, Zeyuan and Huang, Shuhe and Liu, Haitian and Zhao, Ruowen and Feng, Yao and Xiang, Chendong and Rong, Yinze and Zhao, Hongyan and Liu, Hanyu and Su, Zhizhong and Ma, Lei and Su, Hang and Zhu, Jun}, title = {Motus: A Unified Latent Action World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35101-35113} }
Neural Distribution Prior for LiDAR Out-of-Distribution Detection: Zizhao Li,

Zhengkang Xiang,

Jiayang Ao,

Feng Liu,

Joseph West,

Kourosh Khoshelham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zizhao and Xiang, Zhengkang and Ao, Jiayang and Liu, Feng and West, Joseph and Khoshelham, Kourosh}, title = {Neural Distribution Prior for LiDAR Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3035-3045} }
Echoes of Ownership: Adversarial-Guided Dual Injection for Copyright Protection in MLLMs: Chengwei Xia,

Fan Ma,

Ruijie Quan,

Yunqiu Xu,

Kun Zhan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Chengwei and Ma, Fan and Quan, Ruijie and Xu, Yunqiu and Zhan, Kun and Yang, Yi}, title = {Echoes of Ownership: Adversarial-Guided Dual Injection for Copyright Protection in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20650-20659} }
Physics-Consistent Diffusion for Efficient Fluid Super-Resolution via Multiscale Residual Correction: Zhihao Li,

Shengwei Dong,

Chuang Yi,

Junxuan Gao,

Zhilu Lai,

Zhiqiang Liu,

Wei Wang,

Guangtao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhihao and Dong, Shengwei and Yi, Chuang and Gao, Junxuan and Lai, Zhilu and Liu, Zhiqiang and Wang, Wei and Zhang, Guangtao}, title = {Physics-Consistent Diffusion for Efficient Fluid Super-Resolution via Multiscale Residual Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30574-30583} }
Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern: Xiaopei Zhu,

Guanning Zeng,

Zhanhao Hu,

Jun Zhu,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaopei and Zeng, Guanning and Hu, Zhanhao and Zhu, Jun and Hu, Xiaolin}, title = {Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13356-13365} }
LazyVAR: Accelerating Visual Autoregressive Models via Scale-wise Token Pruning and Parallel Group Decoding: Rongge Mao,

Chengqi Dong,

S Kevin Zhou; [pdf]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Rongge and Dong, Chengqi and Zhou, S Kevin}, title = {LazyVAR: Accelerating Visual Autoregressive Models via Scale-wise Token Pruning and Parallel Group Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12129-12139} }
HiSpatial: Taming Hierarchical 3D Spatial Understanding in Vision-Language Models: Huizhi Liang,

Yichao Shen,

Yu Deng,

Sicheng Xu,

ZhiYuan Feng,

Tong Zhang,

Yaobo Liang,

Jiaolong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Huizhi and Shen, Yichao and Deng, Yu and Xu, Sicheng and Feng, ZhiYuan and Zhang, Tong and Liang, Yaobo and Yang, Jiaolong}, title = {HiSpatial: Taming Hierarchical 3D Spatial Understanding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2502-2514} }
GenColorBench: A Color Evaluation Benchmark for Text-to-Image Generation: Muhammad Atif Butt,

Alexandra Gomez-Villa,

Tao Wu,

Javier Vazquez-Corral,

Joost Van De Weijer,

Kai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Butt_2026_CVPR, author = {Butt, Muhammad Atif and Gomez-Villa, Alexandra and Wu, Tao and Vazquez-Corral, Javier and Van De Weijer, Joost and Wang, Kai}, title = {GenColorBench: A Color Evaluation Benchmark for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36638-36648} }
CI-VID: A Coherent Interleaved Text-Video Dataset: Yiming Ju,

Jijin Hu,

Zhengxiong Luo,

Haoge Deng,

hanyu Zhao,

Li Du,

Wenbo Xiao,

Chengwei Wu,

Donglin Hao,

Xinlong Wang,

Tengfei Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ju_2026_CVPR, author = {Ju, Yiming and Hu, Jijin and Luo, Zhengxiong and Deng, Haoge and Zhao, hanyu and Du, Li and Xiao, Wenbo and Wu, Chengwei and Hao, Donglin and Wang, Xinlong and Pan, Tengfei}, title = {CI-VID: A Coherent Interleaved Text-Video Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25568-25577} }
FedRAC: Rolling Submodel Allocation for Collaborative Fairness in Federated Learning: Zihui Wang,

Yuhang Fu,

Mengmeng Du,

Zhimin Yuan,

Yachen Liu,

Weisheng Liao,

Kaiyu Wang,

Zheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihui and Fu, Yuhang and Du, Mengmeng and Yuan, Zhimin and Liu, Yachen and Liao, Weisheng and Wang, Kaiyu and Wang, Zheng}, title = {FedRAC: Rolling Submodel Allocation for Collaborative Fairness in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31802-31811} }
SpeeDiff: Scalable Pixel-Anchored End-to-End Latent Diffusion Model: Bingliang Zhang,

Wenda Chu,

Yizhuo Li,

Linjie Yang,

Yisong Yue,

Katherine Bouman,

Yang Song,

Qiushan Guo; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bingliang and Chu, Wenda and Li, Yizhuo and Yang, Linjie and Yue, Yisong and Bouman, Katherine and Song, Yang and Guo, Qiushan}, title = {SpeeDiff: Scalable Pixel-Anchored End-to-End Latent Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35893-35903} }
PoseMaster: A Unified 3D Native Framework for Stylized Pose Generation: Hongyu Yan,

Kunming Luo,

Weiyu Li,

Kaiyi Zhang,

Yixun Liang,

Jingwei Huang,

Chunchao Guo,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Hongyu and Luo, Kunming and Li, Weiyu and Zhang, Kaiyi and Liang, Yixun and Huang, Jingwei and Guo, Chunchao and Tan, Ping}, title = {PoseMaster: A Unified 3D Native Framework for Stylized Pose Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34292-34302} }
AdaSVD: Singular Value Decomposition with Adaptive Mechanisms for Large Multimodal Models: Zhiteng Li,

Mingyuan Xia,

Jingyuan Zhang,

Zheng Hui,

Haotong Qin,

Linghe Kong,

Yulun Zhang,

Xiaokang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiteng and Xia, Mingyuan and Zhang, Jingyuan and Hui, Zheng and Qin, Haotong and Kong, Linghe and Zhang, Yulun and Yang, Xiaokang}, title = {AdaSVD: Singular Value Decomposition with Adaptive Mechanisms for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26358-26368} }
EmoThinker: Advancing Visual-Acoustic Emotion Analysis via Structural Token Selection and Chain-of-Thought Reasoning: Qinfu Xu,

Liyuan Pan,

Yiwei Wei,

Shaozu Yuan,

Jiaqi Chen,

Tianyu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qinfu and Pan, Liyuan and Wei, Yiwei and Yuan, Shaozu and Chen, Jiaqi and Liu, Tianyu}, title = {EmoThinker: Advancing Visual-Acoustic Emotion Analysis via Structural Token Selection and Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1672-1682} }
ARC Is a Vision Problem!: Keya Hu,

Ali Cy,

Linlu Qiu,

Xiaoman Delores Ding,

Runqian Wang,

Yeyin Eva Zhu,

Jacob Andreas,

Kaiming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Keya and Cy, Ali and Qiu, Linlu and Ding, Xiaoman Delores and Wang, Runqian and Zhu, Yeyin Eva and Andreas, Jacob and He, Kaiming}, title = {ARC Is a Vision Problem!}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2537-2546} }
Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning: Songyuan Yang,

Weijiang Yu,

Jilin Ma,

Ziyu Liu,

Guijian Tang,

Wenjing Yang,

Huibin Tan,

Nong Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Yu, Weijiang and Ma, Jilin and Liu, Ziyu and Tang, Guijian and Yang, Wenjing and Tan, Huibin and Xiao, Nong}, title = {Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33794-33804} }
CineBrain: A Large-Scale Multi-Modal Audiovisual Brain Dataset for Brain-Conditioned Video Generation: Jianxiong Gao,

Yichang Liu,

Baofeng Yang,

Jianfeng Feng,

Yanwei Fu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jianxiong and Liu, Yichang and Yang, Baofeng and Feng, Jianfeng and Fu, Yanwei}, title = {CineBrain: A Large-Scale Multi-Modal Audiovisual Brain Dataset for Brain-Conditioned Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36224-36234} }
VIRST: Video-Instructed Reasoning Assistant for SpatioTemporal Segmentation: Jihwan Hong,

Jaeyoung Do; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Jihwan and Do, Jaeyoung}, title = {VIRST: Video-Instructed Reasoning Assistant for SpatioTemporal Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3410-3420} }
Beyond Prompt Degradation: Prototype-guided Dual-pool Prompting for Incremental Object Detection: Yaoteng Zhang,

Qing Zhou,

Junyu Gao,

Qi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yaoteng and Zhou, Qing and Gao, Junyu and Wang, Qi}, title = {Beyond Prompt Degradation: Prototype-guided Dual-pool Prompting for Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27568-27578} }
MergeVLA: Cross-Skill Model Merging Toward a Generalist Vision-Language-Action Agent: Yuxia Fu,

Zhizhen Zhang,

Yuqi Zhang,

Zijian Wang,

Zi Huang,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Yuxia and Zhang, Zhizhen and Zhang, Yuqi and Wang, Zijian and Huang, Zi and Luo, Yadan}, title = {MergeVLA: Cross-Skill Model Merging Toward a Generalist Vision-Language-Action Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22335-22347} }
HieraMamba: Video Temporal Grounding via Hierarchical Anchor-Mamba Pooling: Joungbin An,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Joungbin and Grauman, Kristen}, title = {HieraMamba: Video Temporal Grounding via Hierarchical Anchor-Mamba Pooling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16954-16965} }
WeDetect: Fast Open-Vocabulary Object Detection as Retrieval: Shenghao Fu,

Yukun Su,

Fengyun Rao,

Jing LYU,

Xiaohua Xie,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Shenghao and Su, Yukun and Rao, Fengyun and LYU, Jing and Xie, Xiaohua and Zheng, Wei-Shi}, title = {WeDetect: Fast Open-Vocabulary Object Detection as Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20377-20387} }
Latent Implicit Visual Reasoning: Kelvin Li,

Chuyi Shang,

Leonid Karlinsky,

Rogerio Feris,

Trevor Darrell,

Roei Herzig; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kelvin and Shang, Chuyi and Karlinsky, Leonid and Feris, Rogerio and Darrell, Trevor and Herzig, Roei}, title = {Latent Implicit Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33457-33466} }
Guardians of the Hair: Rescuing Soft Boundaries in Depth, Stereo, and Novel Views: Xiang Zhang,

Studios blank,

Yang Zhang,

Studios blank,

Lukas Mehl,

Studios blank,

Markus Gross,

Studios blank,

Christopher Schroers,

Studios blank; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiang and blank, Studios and Zhang, Yang and blank, Studios and Mehl, Lukas and blank, Studios and Gross, Markus and blank, Studios and Schroers, Christopher and blank, Studios}, title = {Guardians of the Hair: Rescuing Soft Boundaries in Depth, Stereo, and Novel Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19822-19832} }
Real-Time Dynamic Scene Rendering with Controlled Compressibility and Contact Awareness: Boya Shi,

Naiyang Guan,

Xiaodong Yi; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Boya and Guan, Naiyang and Yi, Xiaodong}, title = {Real-Time Dynamic Scene Rendering with Controlled Compressibility and Contact Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8308-8318} }
Complementary Prototype Mapping for Efficient Multimodal Anomaly Detection: Yuan Zhao,

Xiaoqin Zhang,

Huchuan Lu,

Lihe Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuan and Zhang, Xiaoqin and Lu, Huchuan and Zhang, Lihe}, title = {Complementary Prototype Mapping for Efficient Multimodal Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14178-14187} }
Content-Adaptive Hierarchical Hyperprior for Neural Video Coding: Junqi Liao,

Yaojun Wu,

Chaoyi Lin,

Zhipin Deng,

Li Li,

Dong Liu,

Xiaoyan Sun; [pdf]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Junqi and Wu, Yaojun and Lin, Chaoyi and Deng, Zhipin and Li, Li and Liu, Dong and Sun, Xiaoyan}, title = {Content-Adaptive Hierarchical Hyperprior for Neural Video Coding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20953-20962} }
Rethinking Prompt Design for Inference-time Scaling in Text-to-Visual Generation: Subin Kim,

Sangwoo Mo,

Mamshad Nayeem Rizve,

Yiran Xu,

Difan Liu,

Jinwoo Shin,

Tobias Hinz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Subin and Mo, Sangwoo and Rizve, Mamshad Nayeem and Xu, Yiran and Liu, Difan and Shin, Jinwoo and Hinz, Tobias}, title = {Rethinking Prompt Design for Inference-time Scaling in Text-to-Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22090-22099} }
Fractal Camouflage: A Bio-Inspired Approach for Multi-Scale Adversarial Attacks in the Infrared Domain: Chengyin Hu,

Xin Wang,

Rui Qiu,

Zhe Jia,

Yingying Zhao,

Kai Wang,

Xu Kang,

Yiwei Wei; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Chengyin and Wang, Xin and Qiu, Rui and Jia, Zhe and Zhao, Yingying and Wang, Kai and Kang, Xu and Wei, Yiwei}, title = {Fractal Camouflage: A Bio-Inspired Approach for Multi-Scale Adversarial Attacks in the Infrared Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34981-34990} }
TempR1: Improving Temporal Understanding of MLLMs via Temporal-Aware Multi-Task Reinforcement Learning: Tao Wu,

Li Yang,

Gen Zhan,

Yabin Zhang,

Yiting Liao,

Junlin Li,

Deliang Fu,

Li Zhang,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Yang, Li and Zhan, Gen and Zhang, Yabin and Liao, Yiting and Li, Junlin and Fu, Deliang and Zhang, Li and Wang, Limin}, title = {TempR1: Improving Temporal Understanding of MLLMs via Temporal-Aware Multi-Task Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2756-2767} }
Finding Distributed Object-Centric Properties in Self-Supervised Transformers: Samyak Rawlekar,

Amitabh Swain,

Yujun Cai,

Yiwei Wang,

Ming-Hsuan Yang,

Narendra Ahuja; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rawlekar_2026_CVPR, author = {Rawlekar, Samyak and Swain, Amitabh and Cai, Yujun and Wang, Yiwei and Yang, Ming-Hsuan and Ahuja, Narendra}, title = {Finding Distributed Object-Centric Properties in Self-Supervised Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31326-31335} }
DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching: Chang Zou,

Changlin Li,

Songtao Liu,

Zhao Zhong,

Kailin Huang,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Chang and Li, Changlin and Liu, Songtao and Zhong, Zhao and Huang, Kailin and Zhang, Linfeng}, title = {DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4590-4601} }
Spike-driven Discrete Aggregation for Event-based Object Detection: Huaning Li,

Ziming Wang,

Runhao Jiang,

Yan Rui,

Huajin Tang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Huaning and Wang, Ziming and Jiang, Runhao and Rui, Yan and Tang, Huajin}, title = {Spike-driven Discrete Aggregation for Event-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15135-15144} }
KnowVal: A Knowledge-Augmented and Value-Guided Autonomous Driving System: Zhongyu Xia,

Wenhao Chen,

Yongtao Wang,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zhongyu and Chen, Wenhao and Wang, Yongtao and Yang, Ming-Hsuan}, title = {KnowVal: A Knowledge-Augmented and Value-Guided Autonomous Driving System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3740-3749} }
Reframing Long-Tailed Learning via Loss Landscape Geometry: Shenghan Chen,

Yiming Liu,

Yanzhen Wang,

Yujia Wang,

Xiankai Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shenghan and Liu, Yiming and Wang, Yanzhen and Wang, Yujia and Lu, Xiankai}, title = {Reframing Long-Tailed Learning via Loss Landscape Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22227-22237} }
EfficientMonoHair: Fast Strand-Level Reconstruction from Monocular Video via Multi-View Direction Fusion: Da Li,

Dominik Engel,

Deng Luo,

Ivan Viola; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Da and Engel, Dominik and Luo, Deng and Viola, Ivan}, title = {EfficientMonoHair: Fast Strand-Level Reconstruction from Monocular Video via Multi-View Direction Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7610-7619} }
OSMO: Open-vocabulary Self-eMOtion Tracking: Mohamed Abdelfattah,

Bugra Tekin,

Fadime Sener,

Necati Cihan Camgoz,

Eric Sauser,

Shugao Ma,

Alexandre Alahi,

Edoardo Remelli; [pdf]
[bibtex]
@InProceedings{Abdelfattah_2026_CVPR, author = {Abdelfattah, Mohamed and Tekin, Bugra and Sener, Fadime and Camgoz, Necati Cihan and Sauser, Eric and Ma, Shugao and Alahi, Alexandre and Remelli, Edoardo}, title = {OSMO: Open-vocabulary Self-eMOtion Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1737-1748} }
DyaDiT: A Multi-Modal Diffusion Transformer for Socially Favorable Dyadic Gesture Generation: Yichen Peng,

Jyun-Ting Song,

Siyeol Jung,

Ulsan National Institute of Science & Technology blank,

Ruofan Liu,

Haiyang Liu,

Xuangeng Chu,

Ruicong Liu,

Erwin Wu,

Hideki Koike,

Kris Kitani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Yichen and Song, Jyun-Ting and Jung, Siyeol and blank, Ulsan National Institute of Science \& Technology and Liu, Ruofan and Liu, Haiyang and Chu, Xuangeng and Liu, Ruicong and Wu, Erwin and Koike, Hideki and Kitani, Kris}, title = {DyaDiT: A Multi-Modal Diffusion Transformer for Socially Favorable Dyadic Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10932-10942} }
KLIP: Localized Distribution Shift Detection via KL-Divergence with Diffusion Priors in Inverse Problems: Alireza Kheirandish,

Jihoon Hong,

Sara Fridovich-Keil; [pdf] [supp]
[bibtex]
@InProceedings{Kheirandish_2026_CVPR, author = {Kheirandish, Alireza and Hong, Jihoon and Fridovich-Keil, Sara}, title = {KLIP: Localized Distribution Shift Detection via KL-Divergence with Diffusion Priors in Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30823-30832} }
Precise Object and Effect Removal with Adaptive Target-Aware Attention: Jixin Zhao,

Zhouxia Wang,

Peiqing Yang,

Shangchen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jixin and Wang, Zhouxia and Yang, Peiqing and Zhou, Shangchen}, title = {Precise Object and Effect Removal with Adaptive Target-Aware Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19370-19379} }
Scaling Up AI-Generated Image Detection with Generator-Aware Prototypes: Ziheng Qin,

Yuheng Ji,

Renshuai Tao,

Yuxuan Tian,

Yuyang Liu,

Yipu Wang,

Xiaolong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Ziheng and Ji, Yuheng and Tao, Renshuai and Tian, Yuxuan and Liu, Yuyang and Wang, Yipu and Zheng, Xiaolong}, title = {Scaling Up AI-Generated Image Detection with Generator-Aware Prototypes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43008-43017} }
ViKey: Enhancing Temporal Understanding in Videos via Visual Prompting: Yeonkyung Lee,

Dayun Ju,

Youngmin Kim,

Seil Kang,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yeonkyung and Ju, Dayun and Kim, Youngmin and Kang, Seil and Hwang, Seong Jae}, title = {ViKey: Enhancing Temporal Understanding in Videos via Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38880-38890} }
CMR-RD: Long-Tailed Adaptive VLM for Explainable CMR Diagnosis: Yansong Li,

Zhongxi Qiu,

Yun Tian,

Zheng Jinyu,

Shuo Li; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yansong and Qiu, Zhongxi and Tian, Yun and Jinyu, Zheng and Li, Shuo}, title = {CMR-RD: Long-Tailed Adaptive VLM for Explainable CMR Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7003-7013} }
EgoEdit: Dataset, Real-Time Streaming Model, and Benchmark for Egocentric Video Editing: Runjia Li,

Moayed Haji-Ali,

Ashkan Mirzaei,

Chaoyang Wang,

Arpit Sahni,

Ivan Skorokhodov,

Aliaksandr Siarohin,

Tomas Jakab,

Junlin Han,

Sergey Tulyakov,

Philip Torr,

Willi Menapace; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Runjia and Haji-Ali, Moayed and Mirzaei, Ashkan and Wang, Chaoyang and Sahni, Arpit and Skorokhodov, Ivan and Siarohin, Aliaksandr and Jakab, Tomas and Han, Junlin and Tulyakov, Sergey and Torr, Philip and Menapace, Willi}, title = {EgoEdit: Dataset, Real-Time Streaming Model, and Benchmark for Egocentric Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16042-16053} }
SceMoS: Scene-Aware 3D Human Motion Synthesis by Planning with Geometry-Grounded Tokens: Anindita Ghosh,

Vladislav Golyanik,

Taku Komura,

Philipp Slusallek,

Christian Theobalt,

Rishabh Dabral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Anindita and Golyanik, Vladislav and Komura, Taku and Slusallek, Philipp and Theobalt, Christian and Dabral, Rishabh}, title = {SceMoS: Scene-Aware 3D Human Motion Synthesis by Planning with Geometry-Grounded Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16443-16453} }
Extending Embodied Question Answering from Perception to Decision: Xicheng Gong,

Qiwei Li,

Peiran Xu,

Yadong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Xicheng and Li, Qiwei and Xu, Peiran and Mu, Yadong}, title = {Extending Embodied Question Answering from Perception to Decision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29567-29577} }
TriDF: Evaluating Perception, Detection, and Hallucination for Interpretable DeepFake Detection: Jian-Yu Jiang-Lin,

Kang-Yang Huang,

Ling Zou,

Ling Lo,

Sheng-Ping Yang,

Yu-Wen Tseng,

Kun-Hsiang Lin,

Chia-Ling Chen,

Yu-Ting Ta,

Yan-Tsung Wang,

Po-Ching Chen,

Hongxia Xie,

Hong-Han Shuai,

Wen-Huang Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Jiang-Lin_2026_CVPR, author = {Jiang-Lin, Jian-Yu and Huang, Kang-Yang and Zou, Ling and Lo, Ling and Yang, Sheng-Ping and Tseng, Yu-Wen and Lin, Kun-Hsiang and Chen, Chia-Ling and Ta, Yu-Ting and Wang, Yan-Tsung and Chen, Po-Ching and Xie, Hongxia and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {TriDF: Evaluating Perception, Detection, and Hallucination for Interpretable DeepFake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17087-17098} }
Re-evaluating Continual VQA: Toward Fair and Robust Evaluation for Multimodal Continual Learning: Zijian Gao,

Zicheng Sun,

Xingxing Zhang,

Kele Xu,

Huaimin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Zijian and Sun, Zicheng and Zhang, Xingxing and Xu, Kele and Wang, Huaimin}, title = {Re-evaluating Continual VQA: Toward Fair and Robust Evaluation for Multimodal Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18021-18031} }
RefTon: Reference person shot assist virtual Try-on: Liuzhuozheng Li,

Yue Gong,

Shanyuan Liu,

Zanyi Wang,

Dengyang Jiang,

Leibucha Wu,

Bo Cheng,

Yuhang Ma,

Dawei Leng,

Yuhui Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Liuzhuozheng and Gong, Yue and Liu, Shanyuan and Wang, Zanyi and Jiang, Dengyang and Wu, Leibucha and Cheng, Bo and Ma, Yuhang and Leng, Dawei and Yin, Yuhui}, title = {RefTon: Reference person shot assist virtual Try-on}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14915-14925} }
Let VLMs Grade Their Own Thoughts: A Self-Quantification Approach to Reasoning-Aware Reward Modeling: Xing Xi,

Yu Qiu,

Ronghua Luo,

Peixian Chen,

peilin tong; [pdf] [supp]
[bibtex]
@InProceedings{Xi_2026_CVPR, author = {Xi, Xing and Qiu, Yu and Luo, Ronghua and Chen, Peixian and tong, peilin}, title = {Let VLMs Grade Their Own Thoughts: A Self-Quantification Approach to Reasoning-Aware Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26487-26496} }
PSR: Scaling Multi-Subject Personalized Image Generation with Pairwise Subject-Consistency Rewards: Shulei Wang,

Longhui Wei,

Xin He,

Jianbo Ouyang,

Hui Lu,

Zhou Zhao,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shulei and Wei, Longhui and He, Xin and Ouyang, Jianbo and Lu, Hui and Zhao, Zhou and Tian, Qi}, title = {PSR: Scaling Multi-Subject Personalized Image Generation with Pairwise Subject-Consistency Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14756-14766} }
Gaze Target Estimation Anywhere with Concepts: Xu Cao,

Houze Yang,

Vipin Gunda,

Zhongyi Zhou,

Tianyu Xu,

Adarsh Kowdle,

Inki Kim,

James M. Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Xu and Yang, Houze and Gunda, Vipin and Zhou, Zhongyi and Xu, Tianyu and Kowdle, Adarsh and Kim, Inki and Rehg, James M.}, title = {Gaze Target Estimation Anywhere with Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31304-31315} }
UniMMAD: Unified Multi-Modal and Multi-Class Anomaly Detection via MoE-Driven Feature Decompression: Yuan Zhao,

Youwei Pang,

Lihe Zhang,

Hanqi Liu,

Jiaming Zuo,

Huchuan Lu,

Xiaoqi Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuan and Pang, Youwei and Zhang, Lihe and Liu, Hanqi and Zuo, Jiaming and Lu, Huchuan and Zhao, Xiaoqi}, title = {UniMMAD: Unified Multi-Modal and Multi-Class Anomaly Detection via MoE-Driven Feature Decompression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28502-28511} }
CausalLens: Sensitivity-Guided Multi-Head Causal Intervention for Hallucination Mitigation in Large Vision-Language Models: Junyang Ji,

Qifan Liu,

Wenming Yang,

Zhihai He; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Junyang and Liu, Qifan and Yang, Wenming and He, Zhihai}, title = {CausalLens: Sensitivity-Guided Multi-Head Causal Intervention for Hallucination Mitigation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4199-4209} }
Where MLLMs Attend and What They Rely On: Explaining Autoregressive Token Generation: Ruoyu Chen,

Xiaoqing Guo,

Kangwei Liu,

Siyuan Liang,

Shiming Liu,

Qunli Zhang,

Laiyuan Wang,

Hua Zhang,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ruoyu and Guo, Xiaoqing and Liu, Kangwei and Liang, Siyuan and Liu, Shiming and Zhang, Qunli and Wang, Laiyuan and Zhang, Hua and Cao, Xiaochun}, title = {Where MLLMs Attend and What They Rely On: Explaining Autoregressive Token Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17057-17066} }
Generalizable Radio-Frequency Radiance Fields for Spatial Spectrum Synthesis: Kang Yang,

Yuning Chen,

Wan Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Kang and Chen, Yuning and Du, Wan}, title = {Generalizable Radio-Frequency Radiance Fields for Spatial Spectrum Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12533-12543} }
SymphoMotion: Joint Control of Camera Motion and Object Dynamics for Coherent Video Generation: Guiyu Zhang,

Yabo Chen,

Xunzhi Xiang,

Junchao Huang,

Zhongyu Wang,

Li Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guiyu and Chen, Yabo and Xiang, Xunzhi and Huang, Junchao and Wang, Zhongyu and Jiang, Li}, title = {SymphoMotion: Joint Control of Camera Motion and Object Dynamics for Coherent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11127-11137} }
GraspALL: Adaptive Structural Compensation from Illumination Variation for Robotic Garment Grasping in Any Low-Light Conditions: Haifeng Zhong,

Wenshuo Han,

Zhouyu Wang,

Runyang Feng,

Fan Tang,

Tong-Yee Lee,

Zipei Fan,

Ruihai Wu,

Yuran Wang,

Hao Dong,

Hechang Chen,

Hyung Jin Chang,

Yixing Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Haifeng and Han, Wenshuo and Wang, Zhouyu and Feng, Runyang and Tang, Fan and Lee, Tong-Yee and Fan, Zipei and Wu, Ruihai and Wang, Yuran and Dong, Hao and Chen, Hechang and Chang, Hyung Jin and Gao, Yixing}, title = {GraspALL: Adaptive Structural Compensation from Illumination Variation for Robotic Garment Grasping in Any Low-Light Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6631-6641} }
Tell Model Where to Look: Mitigating Hallucinations in MLLMs by Vision-Guided Attention: Jianfei Zhao,

Feng Zhang,

Xin Sun,

Chong Feng,

Zhixing Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianfei and Zhang, Feng and Sun, Xin and Feng, Chong and Tan, Zhixing}, title = {Tell Model Where to Look: Mitigating Hallucinations in MLLMs by Vision-Guided Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32582-32591} }
Measure The Feature Universe: Topology-based Pseudo Labeling and Gravity Consistency for Source-Free Domain Adaptation: Jae Yun Lee,

Hyeok Nam,

Sung In Cho; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jae Yun and Nam, Hyeok and Cho, Sung In}, title = {Measure The Feature Universe: Topology-based Pseudo Labeling and Gravity Consistency for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3617-3626} }
Boosting Vision-Language Models Towards Cross-Domain Incremental Object Detection: Xu Wang,

Zihan Lin,

Yixin Zhang,

Zilei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xu and Lin, Zihan and Zhang, Yixin and Wang, Zilei}, title = {Boosting Vision-Language Models Towards Cross-Domain Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6249-6260} }
Pico-Banana-400K: A Large-Scale Dataset for Text-Guided Image Editing: Yusu Qian,

Eli Bocek-Rivele,

Liangchen Song,

Jialing Tong,

Yinfei Yang,

Jiasen Lu,

Wenze Hu,

Zhe Gan; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Yusu and Bocek-Rivele, Eli and Song, Liangchen and Tong, Jialing and Yang, Yinfei and Lu, Jiasen and Hu, Wenze and Gan, Zhe}, title = {Pico-Banana-400K: A Large-Scale Dataset for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37226-37235} }
D2Cache: Second-Order Delta Caching for Higher Video Diffusion Acceleration: Enhuai Liu,

Yunke Wang,

Changming Sun,

Chang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Enhuai and Wang, Yunke and Sun, Changming and Xu, Chang}, title = {D2Cache: Second-Order Delta Caching for Higher Video Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43589-43599} }
StructXLIP: Enhancing Vision-language Models with Multimodal Structural Cues: Zanxi Ruan,

Songqun Gao,

Qiuyu Kong,

Yiming Wang,

Marco Cristani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Zanxi and Gao, Songqun and Kong, Qiuyu and Wang, Yiming and Cristani, Marco}, title = {StructXLIP: Enhancing Vision-language Models with Multimodal Structural Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17292-17302} }
Cross-Slice Knowledge Transfer via Masked Multi-Modal Heterogeneous Graph Contrastive Learning for Spatial Gene Expression Inference: Zhiceng Shi,

Changmiao Wang,

Jun Wan,

Wenwen Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhiceng and Wang, Changmiao and Wan, Jun and Min, Wenwen}, title = {Cross-Slice Knowledge Transfer via Masked Multi-Modal Heterogeneous Graph Contrastive Learning for Spatial Gene Expression Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5710-5719} }
Deformation-based In-Context Learning for Point Cloud Understanding: Chengxing Lin,

Jinhong Deng,

Yinjie Lei,

Wen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Chengxing and Deng, Jinhong and Lei, Yinjie and Li, Wen}, title = {Deformation-based In-Context Learning for Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39001-39010} }
SemVideo: Reconstructs What You Watch from Brain Activity via Hierarchical Semantic Guidance: Minghan Yang,

Lan Yang,

Ke Li,

Honggang Zhang,

Kaiyue Pang,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Minghan and Yang, Lan and Li, Ke and Zhang, Honggang and Pang, Kaiyue and Song, Yi-Zhe}, title = {SemVideo: Reconstructs What You Watch from Brain Activity via Hierarchical Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13658-13669} }
M3DocDep: Multi-modal, Multi-page, Multi-document Dependency Chunking with Large Vision-Language Models: Joongmin Shin,

Jeongbae Park,

Jaehyung Seo,

Heuiseok Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2026_CVPR, author = {Shin, Joongmin and Park, Jeongbae and Seo, Jaehyung and Lim, Heuiseok}, title = {M3DocDep: Multi-modal, Multi-page, Multi-document Dependency Chunking with Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16603-16613} }
Differentiable Laplacian Matrix Guided Superpixel Segmentation: Jeremy Juybari,

Josh Hamilton,

Shuvra Das,

Chaofan Chen,

Andre Khalil,

Yifeng Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Juybari_2026_CVPR, author = {Juybari, Jeremy and Hamilton, Josh and Das, Shuvra and Chen, Chaofan and Khalil, Andre and Zhu, Yifeng}, title = {Differentiable Laplacian Matrix Guided Superpixel Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36168-36178} }
3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion: Minchong Chen,

Xiaoyun Yuan,

Junzhe Wan,

Jianing Zhang,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Minchong and Yuan, Xiaoyun and Wan, Junzhe and Zhang, Jianing and Zhang, Jun}, title = {3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5659-5669} }
PoseAnything: General Pose-guided Video Generation with Part-aware Temporal Coherence: Ruiyan Wang,

Teng Hu,

Kaihui Huang,

Zihan Su,

Ran Yi,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ruiyan and Hu, Teng and Huang, Kaihui and Su, Zihan and Yi, Ran and Ma, Lizhuang}, title = {PoseAnything: General Pose-guided Video Generation with Part-aware Temporal Coherence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23193-23203} }
Content-Aware Dynamic Patchification for Efficient Video Diffusion: Sheng Li,

Connelly Barnes,

Mamshad Nayeem Rizve,

Hongwu Peng,

Zhengang Li,

Ohi Dibua,

Alireza Ganjdanesh,

Xulong Tang,

Yan Kang,

Yifan Gong; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Sheng and Barnes, Connelly and Rizve, Mamshad Nayeem and Peng, Hongwu and Li, Zhengang and Dibua, Ohi and Ganjdanesh, Alireza and Tang, Xulong and Kang, Yan and Gong, Yifan}, title = {Content-Aware Dynamic Patchification for Efficient Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35936-35945} }
Object-WIPER: Training-Free Object and Associated Effect Removal in Videos: Saksham Singh Kushwaha,

Sayan Nag,

Yapeng Tian,

Kuldeep Kulkarni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kushwaha_2026_CVPR, author = {Kushwaha, Saksham Singh and Nag, Sayan and Tian, Yapeng and Kulkarni, Kuldeep}, title = {Object-WIPER: Training-Free Object and Associated Effect Removal in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38071-38080} }
Learning to Adapt: Self-Improving Web Agent via Cognitive-Aware Exploration: Weile Chen,

Bingchen Miao,

Qifan Yu,

Wendong Bu,

Guoming Wang,

Wenqiao Zhang,

Shengyu Zhang,

Juncheng Li,

Siliang Tang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Weile and Miao, Bingchen and Yu, Qifan and Bu, Wendong and Wang, Guoming and Zhang, Wenqiao and Zhang, Shengyu and Li, Juncheng and Tang, Siliang}, title = {Learning to Adapt: Self-Improving Web Agent via Cognitive-Aware Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22402-22411} }
EarlyTom: Early Token Compression Completes Fast Video Understanding: Hesong Wang,

Xin Jin,

Lu Lu,

Chenhaowen Li,

Jian Chen,

Qiang Liu,

Huan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hesong and Jin, Xin and Lu, Lu and Li, Chenhaowen and Chen, Jian and Liu, Qiang and Wang, Huan}, title = {EarlyTom: Early Token Compression Completes Fast Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40559-40568} }
Black-Box Domain Adaptation for Object Detection with Retention-Driven Knowledge Compression: Yuwu Lu,

Chunzhi Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yuwu and Liu, Chunzhi}, title = {Black-Box Domain Adaptation for Object Detection with Retention-Driven Knowledge Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {801-811} }
Muses: Designing, Composing, Generating Nonexistent Fantasy 3D Creatures without Training: Hexiao Lu,

Xiaokun Sun,

Zeyu Cai,

Hao Guo,

Ying Tai,

Jian Yang,

Zhenyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Hexiao and Sun, Xiaokun and Cai, Zeyu and Guo, Hao and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {Muses: Designing, Composing, Generating Nonexistent Fantasy 3D Creatures without Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26995-27006} }
ReaGEN: Adaptive Generation of Structured Chains-of-Thought for Efficient Multimodal Reasoning: Ruiqing Tian,

Mohan Sai Singamsetti,

Di Niu,

Bahador Rashidi; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Ruiqing and Singamsetti, Mohan Sai and Niu, Di and Rashidi, Bahador}, title = {ReaGEN: Adaptive Generation of Structured Chains-of-Thought for Efficient Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33521-33530} }
High Resolution Neural Video Coding with Bi-directional Confidence-Guided Reference Information Modeling: Feng Ye,

Kai Zhang,

Li Zhang,

Chuanmin Jia; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Feng and Zhang, Kai and Zhang, Li and Jia, Chuanmin}, title = {High Resolution Neural Video Coding with Bi-directional Confidence-Guided Reference Information Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33652-33661} }
WorldStereo: Bridging Camera-Guided Video Generation and Scene Reconstruction via 3D Geometric Memories: Yisu Zhang,

Chenjie Cao,

Tengfei Wang,

Xuhui Zuo,

Junta Wu,

Jianke Zhu,

Chunchao Guo; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yisu and Cao, Chenjie and Wang, Tengfei and Zuo, Xuhui and Wu, Junta and Zhu, Jianke and Guo, Chunchao}, title = {WorldStereo: Bridging Camera-Guided Video Generation and Scene Reconstruction via 3D Geometric Memories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40327-40339} }
CoFiDA-M: Concept-Aware Feature Modulation for Cross-Domain Adaptation with Image-Only Inference: Nurjahan Sultana,

Moi Hoon Yap,

Xinqi Fan,

Wenqi Lu; [pdf] [supp]
[bibtex]
@InProceedings{Sultana_2026_CVPR, author = {Sultana, Nurjahan and Yap, Moi Hoon and Fan, Xinqi and Lu, Wenqi}, title = {CoFiDA-M: Concept-Aware Feature Modulation for Cross-Domain Adaptation with Image-Only Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15060-15069} }
GSV2X: Geometry-Aware Uncertainty Modeling and Orthogonal Fusion for Robust Roadside Perception: Jianqiang Xu,

Gensheng Pei,

Huafeng Liu,

Yazhou Yao; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jianqiang and Pei, Gensheng and Liu, Huafeng and Yao, Yazhou}, title = {GSV2X: Geometry-Aware Uncertainty Modeling and Orthogonal Fusion for Robust Roadside Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21409-21419} }
FoundIR-v2: Optimizing Pre-Training Data Mixtures for Image Restoration Foundation Model: Xiang Chen,

Jinshan Pan,

Jiangxin Dong,

Jian Yang,

Jinhui Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiang and Pan, Jinshan and Dong, Jiangxin and Yang, Jian and Tang, Jinhui}, title = {FoundIR-v2: Optimizing Pre-Training Data Mixtures for Image Restoration Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8471-8480} }
Learning Coordinate-based Convolutional Kernels for Continuous SE(3) Equivariant and Efficient Point Cloud Analysis: Jaein Kim,

Hee Bin Yoo,

Dong-Sig Han,

Byoung-Tak Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jaein and Bin Yoo, Hee and Han, Dong-Sig and Zhang, Byoung-Tak}, title = {Learning Coordinate-based Convolutional Kernels for Continuous SE(3) Equivariant and Efficient Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9986-9995} }
PRISM: Learning a Shared Primitive Space for Transferable Skeleton Action Representation: Di Yang,

Yaohui Wang,

Shuai Shao,

François Brémond,

Jiangtao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Di and Wang, Yaohui and Shao, Shuai and Br\'emond, Fran\c{c}ois and Wang, Jiangtao}, title = {PRISM: Learning a Shared Primitive Space for Transferable Skeleton Action Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6085-6094} }
SALMUBench: A Benchmark for Sensitive Association-Level Multimodal Unlearning: Cai Selvas-Sala,

Lei Kang,

Lluis Gomez; [pdf] [supp]
[bibtex]
@InProceedings{Selvas-Sala_2026_CVPR, author = {Selvas-Sala, Cai and Kang, Lei and Gomez, Lluis}, title = {SALMUBench: A Benchmark for Sensitive Association-Level Multimodal Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39351-39360} }
Stay in your Lane: Role Specific Queries with Overlap Suppression Loss for Dense Video Captioning: Seung Hyup Baek,

Jimin Lee,

Hyeongkeun Lee,

Jae Won Cho; [pdf] [arXiv]
[bibtex]
@InProceedings{Baek_2026_CVPR, author = {Baek, Seung Hyup and Lee, Jimin and Lee, Hyeongkeun and Cho, Jae Won}, title = {Stay in your Lane: Role Specific Queries with Overlap Suppression Loss for Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3432-3442} }
MusicInfuser: Making Video Diffusion Listen and Dance: Susung Hong,

Ira Kemelmacher-Shlizerman,

Brian Curless,

Steven M. Seitz; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Susung and Kemelmacher-Shlizerman, Ira and Curless, Brian and Seitz, Steven M.}, title = {MusicInfuser: Making Video Diffusion Listen and Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43751-43761} }
HumanVBench: Probing Human-Centric Video Understanding in MLLMs with Automatically Synthesized Benchmarks: Ting Zhou,

Daoyuan Chen,

Qirui Jiao,

Bolin Ding,

Yaliang Li,

Ying Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Ting and Chen, Daoyuan and Jiao, Qirui and Ding, Bolin and Li, Yaliang and Shen, Ying}, title = {HumanVBench: Probing Human-Centric Video Understanding in MLLMs with Automatically Synthesized Benchmarks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4494-4504} }
Group Diffusion: Enhancing Image Generation by Unlocking Cross-Sample Collaboration: Sicheng Mo,

Thao Nguyen,

Richard Zhang,

Nick Kolkin,

Siddharth Srinivasan Iyer,

Eli Shechtman,

Krishna Kumar Singh,

Yong Jae Lee,

Bolei Zhou,

Yuheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mo_2026_CVPR, author = {Mo, Sicheng and Nguyen, Thao and Zhang, Richard and Kolkin, Nick and Iyer, Siddharth Srinivasan and Shechtman, Eli and Singh, Krishna Kumar and Lee, Yong Jae and Zhou, Bolei and Li, Yuheng}, title = {Group Diffusion: Enhancing Image Generation by Unlocking Cross-Sample Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35702-35712} }
MoRe: Motion-aware Feed-forward 4D Reconstruction Transformer: Juntong Fang,

Zequn Chen,

Weiqi Zhang,

Donglin Di,

Xuancheng Zhang,

Chengmin Yang,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Juntong and Chen, Zequn and Zhang, Weiqi and Di, Donglin and Zhang, Xuancheng and Yang, Chengmin and Liu, Yu-Shen}, title = {MoRe: Motion-aware Feed-forward 4D Reconstruction Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28914-28924} }
SIMPLEPOSTER: A SIMPLE BASELINE FOR PRODUCT POSTER GENERATION: Benlei Cui,

Fangao Zeng,

Weitao Jiang,

Yuwen Zhai,

Haiwen Hong,

Longtao Huang,

Hui Xue,

Wenxiang Shang,

Pipei Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Benlei and Zeng, Fangao and Jiang, Weitao and Zhai, Yuwen and Hong, Haiwen and Huang, Longtao and Xue, Hui and Shang, Wenxiang and Huang, Pipei}, title = {SIMPLEPOSTER: A SIMPLE BASELINE FOR PRODUCT POSTER GENERATION}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14854-14863} }
Simple-ViLMedSAM: Simple Text Prompts Meet Vision-Language Models for Medical Image Segmentation: Chengcan Qian,

Dong Nie,

Geng Chen,

Daoqiang Zhang,

Xuyun Wen; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Chengcan and Nie, Dong and Chen, Geng and Zhang, Daoqiang and Wen, Xuyun}, title = {Simple-ViLMedSAM: Simple Text Prompts Meet Vision-Language Models for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30042-30052} }
Towards Policy-Adaptive Image Guardrail: Benchmark and Method: Caiyong Piao,

Zhiyuan Yan,

Haoming Xu,

Yunzhen Zhao,

Kaiqing Lin,

Feiyang Xu,

Shuigeng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Piao_2026_CVPR, author = {Piao, Caiyong and Yan, Zhiyuan and Xu, Haoming and Zhao, Yunzhen and Lin, Kaiqing and Xu, Feiyang and Zhou, Shuigeng}, title = {Towards Policy-Adaptive Image Guardrail: Benchmark and Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16614-16623} }
SpikeTrack: A Spike-driven Framework for Efficient Visual Tracking: Qiuyang Zhang,

Jiujun Cheng,

Qichao Mao,

Cong Liu,

Yu Fang,

Yuhong Li,

Mengying Ge,

Shangce Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qiuyang and Cheng, Jiujun and Mao, Qichao and Liu, Cong and Fang, Yu and Li, Yuhong and Ge, Mengying and Gao, Shangce}, title = {SpikeTrack: A Spike-driven Framework for Efficient Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6802-6811} }
Beyond Single-View Sufficiency: CVBench for Cross-View Human Understanding: Tianchen Guo,

Chen Liu,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Tianchen and Liu, Chen and Yu, Xin}, title = {Beyond Single-View Sufficiency: CVBench for Cross-View Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7154-7164} }
Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition: Minxue Tang,

Yangyang Yu,

Aolin Ding,

Maziyar Baran Pouyan,

Taha Belkhouja,

Yujia Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Minxue and Yu, Yangyang and Ding, Aolin and Pouyan, Maziyar Baran and Belkhouja, Taha and Bao, Yujia}, title = {Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7978-7989} }
Energy-GS: Image Energy-guided Pose Alignment Gaussian Splatting with redesigned pose gradient flow: Yu Gao,

Lutong Su,

Ruixiang Huang,

Tianji Jiang,

Jiadong Tang,

Yufeng Yue,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yu and Su, Lutong and Huang, Ruixiang and Jiang, Tianji and Tang, Jiadong and Yue, Yufeng and Yang, Yi}, title = {Energy-GS: Image Energy-guided Pose Alignment Gaussian Splatting with redesigned pose gradient flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7310-7319} }
Sparsity as a Key: Unlocking New Insights from Latent Structures for Out-of-Distribution Detection: Ahyoung Oh,

Wonseok Shin,

Songkuk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Ahyoung and Shin, Wonseok and Kim, Songkuk}, title = {Sparsity as a Key: Unlocking New Insights from Latent Structures for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19298-19307} }
TIPSv2: Advancing Vision-Language Pretraining with Enhanced Patch-Text Alignment: Bingyi Cao,

Koert Chen,

Kevis-Kokitsi Maninis,

Kaifeng Chen,

Arjun Karpur,

Ye Xia,

Sahil Dua,

Tanmaya Dabral,

Guangxing Han,

Bohyung Han,

Joshua Ainslie,

Alex Bewley,

Mithun Jacob,

René Wagner,

Washington Ramos,

Krzysztof Choromanski,

Mojtaba Seyedhosseini,

Howard Zhou,

Andre Araujo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Bingyi and Chen, Koert and Maninis, Kevis-Kokitsi and Chen, Kaifeng and Karpur, Arjun and Xia, Ye and Dua, Sahil and Dabral, Tanmaya and Han, Guangxing and Han, Bohyung and Ainslie, Joshua and Bewley, Alex and Jacob, Mithun and Wagner, Ren\'e and Ramos, Washington and Choromanski, Krzysztof and Seyedhosseini, Mojtaba and Zhou, Howard and Araujo, Andre}, title = {TIPSv2: Advancing Vision-Language Pretraining with Enhanced Patch-Text Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29325-29335} }
NAMI: Efficient Image Generation via Bridged Progressive Rectified Flow Transformers: Yuhang Ma,

Bo Cheng,

Shanyuan Liu,

Hongyi Zhou,

Liebucha Wu,

Dawei Leng,

Yuhui Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yuhang and Cheng, Bo and Liu, Shanyuan and Zhou, Hongyi and Wu, Liebucha and Leng, Dawei and Yin, Yuhui}, title = {NAMI: Efficient Image Generation via Bridged Progressive Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25667-25676} }
Temporal Equilibrium MeanFlow: Bridging the Scale Gap for One-Step Generation: Yuanpeng Tu,

Yunpeng Chen,

Xinyu Zhang,

Chao Liao,

Hengshuang Zhao; [pdf]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Yuanpeng and Chen, Yunpeng and Zhang, Xinyu and Liao, Chao and Zhao, Hengshuang}, title = {Temporal Equilibrium MeanFlow: Bridging the Scale Gap for One-Step Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16064-16073} }
Mostly Text, Smart Visuals: Asymmetric Text-Visual Pruning for Large Vision-Language Models: Sijie Li,

Biao Qian,

Jungong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Sijie and Qian, Biao and Han, Jungong}, title = {Mostly Text, Smart Visuals: Asymmetric Text-Visual Pruning for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10472-10481} }
Remedying Target-Domain Astigmatism for Cross-Domain Few-Shot Object Detection: Yongwei Jiang,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yongwei and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Remedying Target-Domain Astigmatism for Cross-Domain Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19580-19590} }
The LLM Bottleneck: Why Open-Source Vision LLMs Struggle with Hierarchical Visual Recognition: Yuwen Tan,

Yuan Qing,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Yuwen and Qing, Yuan and Gong, Boqing}, title = {The LLM Bottleneck: Why Open-Source Vision LLMs Struggle with Hierarchical Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38532-38543} }
Test-Time Alignment of Text-to-Image Diffusion Models via Null-Text Embedding Optimisation: Taehoon Kim,

Henry Gouk,

Timothy Hospedales; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Taehoon and Gouk, Henry and Hospedales, Timothy}, title = {Test-Time Alignment of Text-to-Image Diffusion Models via Null-Text Embedding Optimisation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43611-43620} }
FedHarmony: Harmonizing Heterogeneous Label Correlations in Federated Multi-Label Learning: Zhiqiang Kou,

Junxiang Wu,

Wenke Huang,

Wenwen He,

Ming-Kun Xie,

Changwei Wang,

Yuheng Jia,

Di Jiang,

Yang Liu,

Xin Geng,

Qiang Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Kou_2026_CVPR, author = {Kou, Zhiqiang and Wu, Junxiang and Huang, Wenke and He, Wenwen and Xie, Ming-Kun and Wang, Changwei and Jia, Yuheng and Jiang, Di and Liu, Yang and Geng, Xin and Yang, Qiang}, title = {FedHarmony: Harmonizing Heterogeneous Label Correlations in Federated Multi-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10324-10334} }
SPEAR-1: Scaling Beyond Robot Demonstrations via 3D Understanding: Nikolay Nikolov,

Giuliano Albanese,

Sombit Dey,

Aleksandar Yanev,

Luc Van Gool,

Jan-Nico Zaech,

Danda Pani Paudel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nikolov_2026_CVPR, author = {Nikolov, Nikolay and Albanese, Giuliano and Dey, Sombit and Yanev, Aleksandar and Van Gool, Luc and Zaech, Jan-Nico and Paudel, Danda Pani}, title = {SPEAR-1: Scaling Beyond Robot Demonstrations via 3D Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35124-35134} }
MuViT: Multi-Resolution Vision Transformers for Learning Across Scales in Microscopy: Albert Dominguez Mantes,

Gioele La Manno,

Martin Weigert; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mantes_2026_CVPR, author = {Mantes, Albert Dominguez and La Manno, Gioele and Weigert, Martin}, title = {MuViT: Multi-Resolution Vision Transformers for Learning Across Scales in Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13648-13657} }
Stealing Split Learning Bottom Models by Recovering Embedding Geometry: Qinbo Zhang,

Yanhang Shi,

Ziyi Zhang,

Hao Wang,

Sai Qian Zhang,

Jian Li; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qinbo and Shi, Yanhang and Zhang, Ziyi and Wang, Hao and Zhang, Sai Qian and Li, Jian}, title = {Stealing Split Learning Bottom Models by Recovering Embedding Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20660-20669} }
SuP: Sub-cloud Driven Point Cloud Registration: Sheldon Fung,

Wei Pan,

Ling Cao,

Fei Hou,

Ling Chen,

Shasha Mao,

Hongdong Li,

Xuequan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Fung_2026_CVPR, author = {Fung, Sheldon and Pan, Wei and Cao, Ling and Hou, Fei and Chen, Ling and Mao, Shasha and Li, Hongdong and Lu, Xuequan}, title = {SuP: Sub-cloud Driven Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24185-24194} }
Low-Resolution Editing is All You Need for High-Resolution Editing: Junsung Lee,

Hyunsoo Lee,

Yong Jae Lee,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junsung and Lee, Hyunsoo and Lee, Yong Jae and Han, Bohyung}, title = {Low-Resolution Editing is All You Need for High-Resolution Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16216-16225} }
When Token Pruning is Worse than Random: Understanding Visual Token Information in VLLMs: Yahong Wang,

Juncheng Wu,

Zhangkai Ni,

Longzhen Yang,

Yihang Liu,

Chengmei Yang,

Ying Wen,

Lianghua He,

Xianfeng Tang,

Hui Liu,

Yuyin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yahong and Wu, Juncheng and Ni, Zhangkai and Yang, Longzhen and Liu, Yihang and Yang, Chengmei and Wen, Ying and He, Lianghua and Tang, Xianfeng and Liu, Hui and Zhou, Yuyin}, title = {When Token Pruning is Worse than Random: Understanding Visual Token Information in VLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31910-31919} }
Parse, Search, and Confirmation: Training-Free Aerial Vision-and-Dialog Navigation with Chain-of-Thought Reasoning and Structured Spatial Memory: Yu Qi,

Hongyu Li,

Shaofei Huang,

Tianrui Hui,

Yaxiong Wang,

Lechao Cheng,

Zhun Zhong,

Si Liu,

Meng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Yu and Li, Hongyu and Huang, Shaofei and Hui, Tianrui and Wang, Yaxiong and Cheng, Lechao and Zhong, Zhun and Liu, Si and Wang, Meng}, title = {Parse, Search, and Confirmation: Training-Free Aerial Vision-and-Dialog Navigation with Chain-of-Thought Reasoning and Structured Spatial Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23859-23868} }
Rethinking Diffusion Model-Based Video Super-Resolution: Leveraging Dense Guidance from Aligned Features: Jingyi Xu,

Meisong Zheng,

Ying Chen,

Minglang Qiao,

Xin Deng,

Mai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jingyi and Zheng, Meisong and Chen, Ying and Qiao, Minglang and Deng, Xin and Xu, Mai}, title = {Rethinking Diffusion Model-Based Video Super-Resolution: Leveraging Dense Guidance from Aligned Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38248-38257} }
Revisiting F-measure Optimization in Multi-Label Classification: A Sampling-based Approach: Zixun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zixun}, title = {Revisiting F-measure Optimization in Multi-Label Classification: A Sampling-based Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16845-16854} }
TANGO: Text-Anchored Guided Optimization for Robust Fine-tuning Vision-Language Models under Label Noise: Tengfei Ma,

Weiran Pan,

Wei Wei; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tengfei and Pan, Weiran and Wei, Wei}, title = {TANGO: Text-Anchored Guided Optimization for Robust Fine-tuning Vision-Language Models under Label Noise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39186-39196} }
WiTTA-Bench: Benchmarking Test-Time Adaptation for WiFi Sensing: Bing Li,

Qiang Wang,

Junda Lu,

Le Zhang,

Yun Liu,

Ce Zhu,

Wei Cui; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bing and Wang, Qiang and Lu, Junda and Zhang, Le and Liu, Yun and Zhu, Ce and Cui, Wei}, title = {WiTTA-Bench: Benchmarking Test-Time Adaptation for WiFi Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18461-18470} }
SGSoft: Learning Fused Semantic-Geometric Features for 3D Shape Correspondence via Template-Guided Soft Signals: Soyeon Yoon,

Chang Wook Seo,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Soyeon and Seo, Chang Wook and Shim, Hyunjung}, title = {SGSoft: Learning Fused Semantic-Geometric Features for 3D Shape Correspondence via Template-Guided Soft Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7142-7153} }
Too Vivid to Be Real? Benchmarking and Calibrating Generative Color Fidelity: Zhengyao Fang,

Zexi Jia,

Yijia Zhong,

Pengcheng Luo,

Jinchao Zhang,

Guangming Lu,

Jun Yu,

Wenjie Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Zhengyao and Jia, Zexi and Zhong, Yijia and Luo, Pengcheng and Zhang, Jinchao and Lu, Guangming and Yu, Jun and Pei, Wenjie}, title = {Too Vivid to Be Real? Benchmarking and Calibrating Generative Color Fidelity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37258-37267} }
EgoRoC: Towards Egocentric Robotic Control via Task-Agnostic Visual Alignment: Wei Feng,

Chi Zhang,

Nan Li,

Qian Zhang,

Qi Zhang,

Mingyan Li; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Zhang, Chi and Li, Nan and Zhang, Qian and Zhang, Qi and Li, Mingyan}, title = {EgoRoC: Towards Egocentric Robotic Control via Task-Agnostic Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34991-35001} }
FluoCLIP: Stain-Aware Focus Quality Assessment in Fluorescence Microscopy: Hyejin Park,

Jiwon Yoon,

Sumin Park,

Suree Kim,

Sinae Jang,

Eunsoo Lee,

Dongmin Kang,

Dongbo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Hyejin and Yoon, Jiwon and Park, Sumin and Kim, Suree and Jang, Sinae and Lee, Eunsoo and Kang, Dongmin and Min, Dongbo}, title = {FluoCLIP: Stain-Aware Focus Quality Assessment in Fluorescence Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28288-28297} }
Physics-Guided Multistep Deformation Reversal for Ancient Bamboo Slip Restoration: Qianqian Tang,

Jinchi Zhu,

Xiaolu Zhou,

Yongchao Xu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Qianqian and Zhu, Jinchi and Zhou, Xiaolu and Xu, Yongchao}, title = {Physics-Guided Multistep Deformation Reversal for Ancient Bamboo Slip Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34061-34071} }
PoseD-Flow: Versatile and Guided Flow Matching Model of Human Pose: Jebastin Nadar,

Simone Foti,

Tolga Birdal; [pdf] [supp]
[bibtex]
@InProceedings{Nadar_2026_CVPR, author = {Nadar, Jebastin and Foti, Simone and Birdal, Tolga}, title = {PoseD-Flow: Versatile and Guided Flow Matching Model of Human Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21165-21175} }
Toward Real-world Infrared Image Super-Resolution: A Unified Autoregressive Framework and Benchmark Dataset: Yang Zou,

Jun Ma,

Zhidong Jiao,

Xingyuan Li,

Zhiying Jiang,

Jinyuan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Yang and Ma, Jun and Jiao, Zhidong and Li, Xingyuan and Jiang, Zhiying and Liu, Jinyuan}, title = {Toward Real-world Infrared Image Super-Resolution: A Unified Autoregressive Framework and Benchmark Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16365-16375} }
In Pursuit of Pixel Supervision for Visual Pre-training: Lihe Yang,

Shang-Wen Li,

Yang Li,

Xinjie Lei,

Dong Wang,

Abdelrahman Mohamed,

Saining Xie,

Hengshuang Zhao,

Kaiming He,

Hu Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Lihe and Li, Shang-Wen and Li, Yang and Lei, Xinjie and Wang, Dong and Mohamed, Abdelrahman and Xie, Saining and Zhao, Hengshuang and He, Kaiming and Xu, Hu}, title = {In Pursuit of Pixel Supervision for Visual Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31974-31984} }
Dense Metric Depth Completion from Sparse Direct Time-of-Flight Sensors: Hakyeong Kim,

Ruicheng Wang,

Chengtang Yao,

Jiaolong Yang,

Min H. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hakyeong and Wang, Ruicheng and Yao, Chengtang and Yang, Jiaolong and Kim, Min H.}, title = {Dense Metric Depth Completion from Sparse Direct Time-of-Flight Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36518-36528} }
Omni-Supervised Motion Editing: Balancing Change and Invariance through Positive-Negative Learning: Zhenwu Shi,

Jingyu Gong,

Peiwei Wang,

Xingzan Wang,

Tianwen Qian,

Wenxi Li,

Yuan Fang,

Jiao Xie,

Lizhuang Ma,

Shaohui Lin; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhenwu and Gong, Jingyu and Wang, Peiwei and Wang, Xingzan and Qian, Tianwen and Li, Wenxi and Fang, Yuan and Xie, Jiao and Ma, Lizhuang and Lin, Shaohui}, title = {Omni-Supervised Motion Editing: Balancing Change and Invariance through Positive-Negative Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30595-30606} }
FabricGen: Microstructure-Aware Woven Fabric Generation: Yingjie Tang,

Di Luo,

Zixiong Wang,

Xiaoli Ling,

Jian Yang,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yingjie and Luo, Di and Wang, Zixiong and Ling, Xiaoli and Yang, Jian and Wang, Beibei}, title = {FabricGen: Microstructure-Aware Woven Fabric Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34333-34342} }
OMG-Bench: A New Challenging Benchmark for Skeleton-based Online Micro Hand Gesture Recognition: Haochen Chang,

Pengfei Ren,

Buyuan Zhang,

Da Li,

Tianhao Han,

Haoyang Zhang,

Liang Xie,

Hongbo Chen,

Erwei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Haochen and Ren, Pengfei and Zhang, Buyuan and Li, Da and Han, Tianhao and Zhang, Haoyang and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {OMG-Bench: A New Challenging Benchmark for Skeleton-based Online Micro Hand Gesture Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7068-7078} }
FedRE: A Representation Entanglement Framework for Model-Heterogeneous Federated Learning: Yuan Yao,

Lixu Wang,

Jiaqi Wu,

Jin Song,

Simin Chen,

Zehua Wang,

Zijian Tian,

Wei Chen,

Huixia Li,

Xiaoxiao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Yuan and Wang, Lixu and Wu, Jiaqi and Song, Jin and Chen, Simin and Wang, Zehua and Tian, Zijian and Chen, Wei and Li, Huixia and Li, Xiaoxiao}, title = {FedRE: A Representation Entanglement Framework for Model-Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39466-39475} }
Cell-Type Prototype-Informed Neural Network for Gene Expression Estimation from Pathology Images: Kazuya Nishimura,

Ryoma Bise,

Shinnosuke Matsuo,

Haruka Hirose,

Yasuhiro Kojima; [pdf] [arXiv]
[bibtex]
@InProceedings{Nishimura_2026_CVPR, author = {Nishimura, Kazuya and Bise, Ryoma and Matsuo, Shinnosuke and Hirose, Haruka and Kojima, Yasuhiro}, title = {Cell-Type Prototype-Informed Neural Network for Gene Expression Estimation from Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19801-19811} }
Scale Space Diffusion: Soumik Mukhopadhyay,

Prateksha Udhayanan,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mukhopadhyay_2026_CVPR, author = {Mukhopadhyay, Soumik and Udhayanan, Prateksha and Shrivastava, Abhinav}, title = {Scale Space Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35851-35860} }
SelecTKD: Selective Token-Weighted Knowledge Distillation for LLMs: Haiduo Huang,

Jiangcheng Song,

Yadong Zhang,

Pengju Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Haiduo and Song, Jiangcheng and Zhang, Yadong and Ren, Pengju}, title = {SelecTKD: Selective Token-Weighted Knowledge Distillation for LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19287-19297} }
Prune2Drive: A Plug-and-Play Framework for Accelerating Vision-Language Models in Autonomous Driving: Minhao Xiong,

Zichen Wen,

Zhuangcheng Gu,

Xuyang Liu,

Rui Zhang,

Hengrui Kang,

Jiabing Yang,

Junyuan Zhang,

Weijia Li,

Conghui He,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Minhao and Wen, Zichen and Gu, Zhuangcheng and Liu, Xuyang and Zhang, Rui and Kang, Hengrui and Yang, Jiabing and Zhang, Junyuan and Li, Weijia and He, Conghui and Zhang, Linfeng}, title = {Prune2Drive: A Plug-and-Play Framework for Accelerating Vision-Language Models in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25215-25224} }
Reinforcing Structured Chain-of-Thought for Video Understanding: Peiyao Wang,

Haotian Xu,

Noranart Vesdapunt,

Rui Hou,

Jingyi Zhang,

Haibin Ling,

Oleksandr Obiednikov,

Ning Zhou,

Kah Kuen Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Peiyao and Xu, Haotian and Vesdapunt, Noranart and Hou, Rui and Zhang, Jingyi and Ling, Haibin and Obiednikov, Oleksandr and Zhou, Ning and Fu, Kah Kuen}, title = {Reinforcing Structured Chain-of-Thought for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9794-9803} }
VGGT-360: Geometry-Consistent Zero-Shot Panoramic Depth Estimation: Jiayi Yuan,

Haobo Jiang,

De Wen Soh,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Jiayi and Jiang, Haobo and Soh, De Wen and Zhao, Na}, title = {VGGT-360: Geometry-Consistent Zero-Shot Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19874-19883} }
LitePT: Lighter Yet Stronger Point Transformer: Yuanwen Yue,

Damien Robert,

Jianyuan Wang,

Sunghwan Hong,

Jan Dirk Wegner,

Christian Rupprecht,

Konrad Schindler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Yuanwen and Robert, Damien and Wang, Jianyuan and Hong, Sunghwan and Wegner, Jan Dirk and Rupprecht, Christian and Schindler, Konrad}, title = {LitePT: Lighter Yet Stronger Point Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24173-24184} }
Dynamic-eDiTor: Training-Free Text-Driven 4D Scene Editing with Multimodal Diffusion Transformer: Dong In Lee,

Hyungjun Doh,

Seunggeun Chi,

Runlin Duan,

Sangpil Kim,

Karthik Ramani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Dong In and Doh, Hyungjun and Chi, Seunggeun and Duan, Runlin and Kim, Sangpil and Ramani, Karthik}, title = {Dynamic-eDiTor: Training-Free Text-Driven 4D Scene Editing with Multimodal Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1187-1197} }
VisRes Bench: On Evaluating the Visual Reasoning Capabilities of VLMs: Brigitta Malagurski Törtei,

Yasser Dahou,

Ngoc Dung Huynh,

Wamiq Reyaz Para,

Phúc H. Lê Khac,

Ankit Singh,

Sofian Chaybouti,

Sanath Narayan; [pdf] [supp]
[bibtex]
@InProceedings{Tortei_2026_CVPR, author = {T\"ortei, Brigitta Malagurski and Dahou, Yasser and Huynh, Ngoc Dung and Para, Wamiq Reyaz and Khac, Ph\'uc H. L\^e and Singh, Ankit and Chaybouti, Sofian and Narayan, Sanath}, title = {VisRes Bench: On Evaluating the Visual Reasoning Capabilities of VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33185-33195} }
Guiding Diffusion-based Reconstruction with Contrastive Signals for Balanced Visual Representation: Boyu Han,

Qianqian Xu,

Shilong Bao,

Zhiyong Yang,

Ruochen Cui,

Xilin Zhao,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Boyu and Xu, Qianqian and Bao, Shilong and Yang, Zhiyong and Cui, Ruochen and Zhao, Xilin and Huang, Qingming}, title = {Guiding Diffusion-based Reconstruction with Contrastive Signals for Balanced Visual Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2369-2380} }
Domain-Skewed Federated Learning with Feature Decoupling and Calibration: Huan Wang,

Jun Shen,

Jun Yan,

Guansong Pang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Huan and Shen, Jun and Yan, Jun and Pang, Guansong}, title = {Domain-Skewed Federated Learning with Feature Decoupling and Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17484-17493} }
MoBind: Motion Binding for Fine-Grained IMU-Video Pose Alignment: Duc Duy Nguyen,

Tat-Jun Chin,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Duc Duy and Chin, Tat-Jun and Hoai, Minh}, title = {MoBind: Motion Binding for Fine-Grained IMU-Video Pose Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22123-22132} }
Learned Image Compression via Sparse Attention and Adaptive Frequency: Huidong Ma,

Xinyan Shi,

Hui Sun,

Xiaofei Yue,

Xiaoguang Liu,

Gang Wang,

Wentong Cai; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Huidong and Shi, Xinyan and Sun, Hui and Yue, Xiaofei and Liu, Xiaoguang and Wang, Gang and Cai, Wentong}, title = {Learned Image Compression via Sparse Attention and Adaptive Frequency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41278-41287} }
Learning to Refuse: Refusal-Aware Reinforcement Fine-Tuning for Hard-Irrelevant Queries in Video Temporal Grounding: Jin-Seop Lee,

SungJoon Lee,

SeongJun Jung,

Boyang Li,

Jee-Hyong Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jin-Seop and Lee, SungJoon and Jung, SeongJun and Li, Boyang and Lee, Jee-Hyong}, title = {Learning to Refuse: Refusal-Aware Reinforcement Fine-Tuning for Hard-Irrelevant Queries in Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10397-10407} }
FedAdamom: Adaptive Momentum for Improved Generalization in Federated Optimization: Wenjie Hou,

Tianxiang Chen,

Feng Wang,

Tiantong Wu,

Zhiming Zheng,

Shaoting Tang,

Wei Yang Bryan Lim; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Wenjie and Chen, Tianxiang and Wang, Feng and Wu, Tiantong and Zheng, Zhiming and Tang, Shaoting and Lim, Wei Yang Bryan}, title = {FedAdamom: Adaptive Momentum for Improved Generalization in Federated Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36354-36364} }
Beyond the Static World: Continual Category Discovery under Visual Drift: Wei Feng,

Yiwen Jiang,

Sijin Zhou,

Zongyuan Ge; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Jiang, Yiwen and Zhou, Sijin and Ge, Zongyuan}, title = {Beyond the Static World: Continual Category Discovery under Visual Drift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25032-25042} }
SEATrack: Simple, Efficient, and Adaptive Multimodal Tracker: Junbin Su,

Ziteng Xue,

Shihui Zhang,

Kun Chen,

Weiming Hu,

Zhipeng Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Junbin and Xue, Ziteng and Zhang, Shihui and Chen, Kun and Hu, Weiming and Zhang, Zhipeng}, title = {SEATrack: Simple, Efficient, and Adaptive Multimodal Tracker}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28679-28689} }
Rethinking Concept Bottleneck Models: From Pitfalls to Solutions: Merve Tapli,

Quentin Bouniot,

Wolfgang Stammer,

Zeynep Akata,

Emre Akbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tapli_2026_CVPR, author = {Tapli, Merve and Bouniot, Quentin and Stammer, Wolfgang and Akata, Zeynep and Akbas, Emre}, title = {Rethinking Concept Bottleneck Models: From Pitfalls to Solutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9901-9910} }
WildRayZer: Self-supervised Large View Synthesis in Dynamic Environments: Xuweiyi Chen,

Wentao Zhou,

Zezhou Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xuweiyi and Zhou, Wentao and Cheng, Zezhou}, title = {WildRayZer: Self-supervised Large View Synthesis in Dynamic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1252-1264} }
Attribution as Retrieval: Model-Agnostic AI-Generated Image Attribution: Hongsong Wang,

Renxi Cheng,

Chaolei Han,

Jie Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hongsong and Cheng, Renxi and Han, Chaolei and Gui, Jie}, title = {Attribution as Retrieval: Model-Agnostic AI-Generated Image Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14062-14072} }
Dual-level Adapter Boosting Prompt-free Curvilinear Structure Segmentation: Kai Zhu,

Li Chen,

Jun Cheng; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kai and Chen, Li and Cheng, Jun}, title = {Dual-level Adapter Boosting Prompt-free Curvilinear Structure Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36300-36310} }
Visual Personalization Turing Test: Rameen Abdal,

James Burgess,

Sergey Tulyakov,

Kuan-Chieh Jackson Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abdal_2026_CVPR, author = {Abdal, Rameen and Burgess, James and Tulyakov, Sergey and Wang, Kuan-Chieh Jackson}, title = {Visual Personalization Turing Test}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14789-14799} }
Spectrally Distilled Representations Aligned with Instruction-Augmented LLMs for Satellite Imagery: Minh Kha Do,

Wei Xiang,

Kang Han,

Di Wu,

Khoa Phan,

Yi-Ping Phoebe Chen,

Gaowen Liu,

Ramana Rao Kompella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Do_2026_CVPR, author = {Do, Minh Kha and Xiang, Wei and Han, Kang and Wu, Di and Phan, Khoa and Chen, Yi-Ping Phoebe and Liu, Gaowen and Kompella, Ramana Rao}, title = {Spectrally Distilled Representations Aligned with Instruction-Augmented LLMs for Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6453-6463} }
Multimodal Distribution Matching for Vision-Language Dataset Distillation: Jongoh Jeong,

Hoyong Kwon,

Minseok Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Jongoh and Kwon, Hoyong and Kim, Minseok and Yoon, Kuk-Jin}, title = {Multimodal Distribution Matching for Vision-Language Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23072-23082} }
Anchoring the Mind of Multimodal Reasoners: Cognitive Bias as a Vector for Jailbreak Attacks: Linhua Cong,

Bingrui Sima,

Kun He; [pdf] [supp]
[bibtex]
@InProceedings{Cong_2026_CVPR, author = {Cong, Linhua and Sima, Bingrui and He, Kun}, title = {Anchoring the Mind of Multimodal Reasoners: Cognitive Bias as a Vector for Jailbreak Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36926-36935} }
HAMMER: Harnessing MLLMs via Cross-Modal Integration for Intention-Driven 3D Affordance Grounding: Lei Yao,

Yong Chen,

Yuejiao Su,

Yi Wang,

Moyun Liu,

Lap-Pui Chau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Lei and Chen, Yong and Su, Yuejiao and Wang, Yi and Liu, Moyun and Chau, Lap-Pui}, title = {HAMMER: Harnessing MLLMs via Cross-Modal Integration for Intention-Driven 3D Affordance Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23805-23815} }
Localizing, Structuring, and Rendering: Bridging 3D and 2D Vision-Language-Action Models for Robotic Manipulation: Yunlong Zhao,

Xiaoheng Deng,

Yichao Cao,

Yi Chen,

Xiangjian He,

Shan You,

Shuo Yang,

Lei Fan,

Fei Wang,

Xiu Su; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yunlong and Deng, Xiaoheng and Cao, Yichao and Chen, Yi and He, Xiangjian and You, Shan and Yang, Shuo and Fan, Lei and Wang, Fei and Su, Xiu}, title = {Localizing, Structuring, and Rendering: Bridging 3D and 2D Vision-Language-Action Models for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20812-20822} }
Towards Knowledge-augmented Bayesian Deep Learning For Computer Vision: Wang Ma,

Hanjing Wang,

Yufei Zhang,

Darsha Udayanga,

Qiang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Wang and Wang, Hanjing and Zhang, Yufei and Udayanga, Darsha and Ji, Qiang}, title = {Towards Knowledge-augmented Bayesian Deep Learning For Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6136-6146} }
Masked-Diffusion Autoencoders for 3D Medical Vision Representation Learning: Jiachen Tu,

Guanghui Qin,

Theodore Zhengde Zhao,

Jeya Maria Jose Valanarasu,

Sheng Zhang,

Tristan Naumann,

Fan Lam,

Sheng Wang,

Hoifung Poon; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Jiachen and Qin, Guanghui and Zhao, Theodore Zhengde and Valanarasu, Jeya Maria Jose and Zhang, Sheng and Naumann, Tristan and Lam, Fan and Wang, Sheng and Poon, Hoifung}, title = {Masked-Diffusion Autoencoders for 3D Medical Vision Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22804-22815} }
BEV-CAR: Enhancing Monocular Bird's Eye View Segmentation with Context-Aware Rasterization: Yixin Xiong,

Ke Wang,

Tongtong Cheng,

Chunhui Liu,

Kai Liu; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Yixin and Wang, Ke and Cheng, Tongtong and Liu, Chunhui and Liu, Kai}, title = {BEV-CAR: Enhancing Monocular Bird's Eye View Segmentation with Context-Aware Rasterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39869-39878} }
Spectrum from Defocus: Fast Spectral Imaging with Chromatic Focal Stack: M. Kerem Aydin,

Yi-Chun Hung,

Jaclyn Pytlarz,

Qi Guo,

Emma Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aydin_2026_CVPR, author = {Aydin, M. Kerem and Hung, Yi-Chun and Pytlarz, Jaclyn and Guo, Qi and Alexander, Emma}, title = {Spectrum from Defocus: Fast Spectral Imaging with Chromatic Focal Stack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {220-230} }
IF-Prune: Information-Flow Guided Token Pruning for Efficient Vision-Language Models: Guohao Sun,

Yufei Wang,

Sizhuo Ma,

Yuege Xie,

Yuting Cheng,

Zhiqiang Tao,

Jian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Guohao and Wang, Yufei and Ma, Sizhuo and Xie, Yuege and Cheng, Yuting and Tao, Zhiqiang and Wang, Jian}, title = {IF-Prune: Information-Flow Guided Token Pruning for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3522-3531} }
Curvature-Aware Zeroth-Order Optimization for Memory-Efficient Test-Time Adaptation: Junming Zhang,

Shuyu Yin,

Peilin Liu,

Rendong Ying,

Fei Wen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Yin, Shuyu and Liu, Peilin and Ying, Rendong and Wen, Fei}, title = {Curvature-Aware Zeroth-Order Optimization for Memory-Efficient Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {836-846} }
Hidden Monotonicity: Explaining Deep Neural Networks via their DC Decomposition: Jakob Paul Zimmermann,

Georg Loho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zimmermann_2026_CVPR, author = {Zimmermann, Jakob Paul and Loho, Georg}, title = {Hidden Monotonicity: Explaining Deep Neural Networks via their DC Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24107-24117} }
VL-RouterBench: A Benchmark for Vision-Language Model Routing: Zhehao Huang,

Baijiong Lin,

Jingyuan Zhang,

Jingying Wang,

Yuhang Liu,

Ning Lu,

Tao Li,

Xiaolin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhehao and Lin, Baijiong and Zhang, Jingyuan and Wang, Jingying and Liu, Yuhang and Lu, Ning and Li, Tao and Huang, Xiaolin}, title = {VL-RouterBench: A Benchmark for Vision-Language Model Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9512-9523} }
RevINN: An End-to-End Invertible Neural Network for Reversible Adversarial Examples Generation: Jielun Huang,

Chi-Man Pun,

Guoheng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jielun and Pun, Chi-Man and Huang, Guoheng}, title = {RevINN: An End-to-End Invertible Neural Network for Reversible Adversarial Examples Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6601-6610} }
BiomedCCPL: Causal Conditional Prompt Learning for Biomedical Vision-Language Models: Xueliang Cui,

Juncai Zhang,

Jiacheng Hou,

Dan Lu,

Hao Zhang,

Ruxin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Xueliang and Zhang, Juncai and Hou, Jiacheng and Lu, Dan and Zhang, Hao and Wang, Ruxin}, title = {BiomedCCPL: Causal Conditional Prompt Learning for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40812-40821} }
Neural Gabor Splatting: Enhanced Gaussian Splatting with Neural Gabor for High-frequency Surface Reconstruction: Haato Watanabe,

Nobuyuki Umetani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Watanabe_2026_CVPR, author = {Watanabe, Haato and Umetani, Nobuyuki}, title = {Neural Gabor Splatting: Enhanced Gaussian Splatting with Neural Gabor for High-frequency Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4932-4941} }
RaUF: Learning the Spatial Uncertainty Field of Radar: Shengpeng Wang,

Kuangyu Wang,

Wei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shengpeng and Wang, Kuangyu and Wang, Wei}, title = {RaUF: Learning the Spatial Uncertainty Field of Radar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42474-42483} }
Attention-aware Inference Optimizations for Large Vision-Language Models with Memory-efficient Decoding: Fatih Ilhan,

Gaowen Liu,

Ramana Rao Kompella,

Selim Furkan Tekin,

Tiansheng Huang,

Zachary Yahn,

Yichang Xu,

Ling Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ilhan_2026_CVPR, author = {Ilhan, Fatih and Liu, Gaowen and Kompella, Ramana Rao and Tekin, Selim Furkan and Huang, Tiansheng and Yahn, Zachary and Xu, Yichang and Liu, Ling}, title = {Attention-aware Inference Optimizations for Large Vision-Language Models with Memory-efficient Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10482-10491} }
RoadGIE: Towards A Global-Scale Aerial Benchmark for Generalizable Interactive Road Extraction: Chenxu Peng,

Chenxu Wang,

Yimian Dai,

Yongxiang Liu,

Ming-Ming Cheng,

Xiang Li; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Chenxu and Wang, Chenxu and Dai, Yimian and Liu, Yongxiang and Cheng, Ming-Ming and Li, Xiang}, title = {RoadGIE: Towards A Global-Scale Aerial Benchmark for Generalizable Interactive Road Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13285-13295} }
UniGeoRS: A Unified Benchmark for Tri-view Geo-Localization: Xiao Liang,

Huaizhi Tang,

Feiyang Zhang,

Shiji Yuan,

Chun Hu,

Dezhi Zheng,

Kang Ma; [pdf]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Xiao and Tang, Huaizhi and Zhang, Feiyang and Yuan, Shiji and Hu, Chun and Zheng, Dezhi and Ma, Kang}, title = {UniGeoRS: A Unified Benchmark for Tri-view Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5399-5408} }
AHS: Adaptive Head Synthesis via Synthetic Data Augmentations: Taewoong Kang,

Hyojin Jang,

Sohyun Jeong,

Seunggi Moon,

Gihwi Kim,

Hoon Jin Jung,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Taewoong and Jang, Hyojin and Jeong, Sohyun and Moon, Seunggi and Kim, Gihwi and Jung, Hoon Jin and Choo, Jaegul}, title = {AHS: Adaptive Head Synthesis via Synthetic Data Augmentations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2125-2135} }
Your Dissimilarities Define You: Complementary Learning Exploiting Class Diversities: Dimitrios Katsikas,

Nikolaos Passalis,

Anastasios Tefas; [pdf] [supp]
[bibtex]
@InProceedings{Katsikas_2026_CVPR, author = {Katsikas, Dimitrios and Passalis, Nikolaos and Tefas, Anastasios}, title = {Your Dissimilarities Define You: Complementary Learning Exploiting Class Diversities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10512-10521} }
DriveVLN: Towards Mapless Vision-and-Language Navigation in Autonomous Driving: Dongqian Guo,

Haoran Wei,

Wencheng Han,

Runzhou Tao,

Zhongying Qiu,

Jianfei Yang,

Jianbing Shen; [pdf]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Dongqian and Wei, Haoran and Han, Wencheng and Tao, Runzhou and Qiu, Zhongying and Yang, Jianfei and Shen, Jianbing}, title = {DriveVLN: Towards Mapless Vision-and-Language Navigation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25174-25183} }
Cross-modal Fuzzy Alignment Network for Text-Aerial Person Retrieval and A Large-scale Benchmark: Yifei Deng,

Chenglong Li,

Yuyang Zhang,

Guyue Hu,

Jin Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Yifei and Li, Chenglong and Zhang, Yuyang and Hu, Guyue and Tang, Jin}, title = {Cross-modal Fuzzy Alignment Network for Text-Aerial Person Retrieval and A Large-scale Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38732-38741} }
Asynchronous Temporal Modeling with Two-Agent Framework for Streaming Dense Video Captioning: Yolo Y. Tang,

Chao Huang,

Susan Liang,

Jing Bi,

Yicheng Wang,

Daiki Shimada,

Chenliang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Y. and Huang, Chao and Liang, Susan and Bi, Jing and Wang, Yicheng and Shimada, Daiki and Xu, Chenliang}, title = {Asynchronous Temporal Modeling with Two-Agent Framework for Streaming Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2799-2810} }
SkillSight: Efficient First-Person Skill Assessment with Gaze: Chi Hsuan Wu,

Ashutosh Kumar,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chi Hsuan and Kumar, Ashutosh and Grauman, Kristen}, title = {SkillSight: Efficient First-Person Skill Assessment with Gaze}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38891-38903} }
TGT: Text-Grounded Trajectories for Locally Controlled Video Generation: Guofeng Zhang,

Angtian Wang,

Jacob Zhiyuan Fang,

Liming Jiang,

Haotian Yang,

Bo Liu,

Yiding Yang,

Guang Chen,

Longyin Wen,

Alan Yuille,

Chongyang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guofeng and Wang, Angtian and Fang, Jacob Zhiyuan and Jiang, Liming and Yang, Haotian and Liu, Bo and Yang, Yiding and Chen, Guang and Wen, Longyin and Yuille, Alan and Ma, Chongyang}, title = {TGT: Text-Grounded Trajectories for Locally Controlled Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22028-22037} }
Training One Model to Master Cross-Level Agentic Actions via Reinforcement Learning: Kaichen He,

Zihao Wang,

Muyao Li,

Anji Liu,

Yitao Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Kaichen and Wang, Zihao and Li, Muyao and Liu, Anji and Liang, Yitao}, title = {Training One Model to Master Cross-Level Agentic Actions via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {724-734} }
Mind the Hitch: Dynamic Calibration and Articulated Perception for Autonomous Trucks: Morui Zhu,

Yongqi Zhu,

Song Fu,

Qing Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Morui and Zhu, Yongqi and Fu, Song and Yang, Qing}, title = {Mind the Hitch: Dynamic Calibration and Articulated Perception for Autonomous Trucks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10668-10677} }
Beyond Text: Visual Description Assembly by Probabilistic Model for CLIP-based Weakly Supervised Semantic Segmentation: Xianglin Qiu,

Jian Wang,

Xiaolei Wang,

Zhen Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xianglin and Wang, Jian and Wang, Xiaolei and Zhang, Zhen and Xiao, Jimin}, title = {Beyond Text: Visual Description Assembly by Probabilistic Model for CLIP-based Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6346-6356} }
Molmo2: Open Weights and Data for Vision-Language Models with Video Understanding and Grounding: Christopher Clark,

Jieyu Zhang,

Zixian Ma,

Jae Sung Park,

Rohun Tripathi,

Sangho Lee,

Mohammadreza Salehi,

Jason Ren,

Chris Dongjoo Kim,

Yinuo Yang,

Vincent Shao,

Yue Yang,

Weikai Huang,

Ziqi Gao,

Taira Anderson,

Jianrui Zhang,

Jitesh Jain,

George Stoica,

Ali Farhadi,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Clark_2026_CVPR, author = {Clark, Christopher and Zhang, Jieyu and Ma, Zixian and Park, Jae Sung and Tripathi, Rohun and Lee, Sangho and Salehi, Mohammadreza and Ren, Jason and Kim, Chris Dongjoo and Yang, Yinuo and Shao, Vincent and Yang, Yue and Huang, Weikai and Gao, Ziqi and Anderson, Taira and Zhang, Jianrui and Jain, Jitesh and Stoica, George and Farhadi, Ali and Krishna, Ranjay}, title = {Molmo2: Open Weights and Data for Vision-Language Models with Video Understanding and Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28652-28668} }
R4-CGQA: Retrieval-based Vision Language Models for Computer Graphics Image Quality Assessment: Zhuangzi Li,

Jian Jin,

Shilv Cai,

Weisi Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhuangzi and Jin, Jian and Cai, Shilv and Lin, Weisi}, title = {R4-CGQA: Retrieval-based Vision Language Models for Computer Graphics Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9468-9477} }
ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior: Weikai Lu,

Ziqian Zeng,

Kehua Zhang,

Haoran Li,

Huiping Zhuang,

Ruidong Wang,

Cen Chen,

Hao Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Weikai and Zeng, Ziqian and Zhang, Kehua and Li, Haoran and Zhuang, Huiping and Wang, Ruidong and Chen, Cen and Peng, Hao}, title = {ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31-40} }
MonoVLM: Monocular 3D Visual Grounding with Vision Language Models: Huaizhi Qu,

Hossein Nourkhiz Mahjoub,

Vaishnav Tadiparthi,

Kwonjoon Lee,

Tianlong Chen; [pdf]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Huaizhi and Mahjoub, Hossein Nourkhiz and Tadiparthi, Vaishnav and Lee, Kwonjoon and Chen, Tianlong}, title = {MonoVLM: Monocular 3D Visual Grounding with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30986-30996} }
SLVMEval: Synthetic Meta Evaluation Benchmark for Text-to-Long Video Generation: Ryosuke Matsuda,

Keito Kudo,

Haruto Yoshida,

Nobuyuki Shimizu,

Jun Suzuki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsuda_2026_CVPR, author = {Matsuda, Ryosuke and Kudo, Keito and Yoshida, Haruto and Shimizu, Nobuyuki and Suzuki, Jun}, title = {SLVMEval: Synthetic Meta Evaluation Benchmark for Text-to-Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7784-7794} }
Discovering Adaptive Task Dependencies for Efficient Multi-Task Representation Compression: Zhimeng Huang,

Rongao Yuan,

Junlong Gao,

Qi Mao,

Siwei Ma,

Wen Gao,

Chuanmin Jia; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhimeng and Yuan, Rongao and Gao, Junlong and Mao, Qi and Ma, Siwei and Gao, Wen and Jia, Chuanmin}, title = {Discovering Adaptive Task Dependencies for Efficient Multi-Task Representation Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5326-5336} }
Single-Round Scalable Analytic Federated Learning: Alan T. L. Bacellar,

Mustafa Munir,

Felipe M. G. França,

Priscila M. V. Lima,

Radu Marculescu,

Lizy K. John; [pdf]
[bibtex]
@InProceedings{Bacellar_2026_CVPR, author = {Bacellar, Alan T. L. and Munir, Mustafa and Fran\c{c}a, Felipe M. G. and Lima, Priscila M. V. and Marculescu, Radu and John, Lizy K.}, title = {Single-Round Scalable Analytic Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39445-39454} }
Lynx: Towards High-Fidelity Personalized Video Generation: Shen Sang,

Tiancheng Zhi,

Tianpei Gu,

Jing Liu,

Linjie Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sang_2026_CVPR, author = {Sang, Shen and Zhi, Tiancheng and Gu, Tianpei and Liu, Jing and Luo, Linjie}, title = {Lynx: Towards High-Fidelity Personalized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9192-9202} }
HierEdit: Region-Aware Hierarchical Diffusion for Efficient High-Resolution Editing: Yuyao Zhang,

Alexander Huang-Menders,

Yu-Wing Tai; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuyao and Huang-Menders, Alexander and Tai, Yu-Wing}, title = {HierEdit: Region-Aware Hierarchical Diffusion for Efficient High-Resolution Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43546-43557} }
BluRef: Unsupervised Image Deblurring with Dense-Matching References: Bang-Dang Pham,

Anh Tran,

Cuong Pham,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2026_CVPR, author = {Pham, Bang-Dang and Tran, Anh and Pham, Cuong and Hoai, Minh}, title = {BluRef: Unsupervised Image Deblurring with Dense-Matching References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37445-37454} }
Talk2Move: Reinforcement Learning for Text-Instructed Object-Level Geometric Transformation in Scenes: Jing Tan,

Zhaoyang Zhang,

Yantao Shen,

Jiarui Cai,

Shuo Yang,

Jiajun Wu,

Wei Xia,

Zhuowen Tu,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Jing and Zhang, Zhaoyang and Shen, Yantao and Cai, Jiarui and Yang, Shuo and Wu, Jiajun and Xia, Wei and Tu, Zhuowen and Soatto, Stefano}, title = {Talk2Move: Reinforcement Learning for Text-Instructed Object-Level Geometric Transformation in Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14735-14745} }
Scaling Multi-Identity Consistency for Image Customization via Multi-to-Multi Matching Paradigm: Yufeng Cheng,

Wenxu Wu,

Shaojin Wu,

Mengqi Huang,

Fei Ding,

Qian He; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yufeng and Wu, Wenxu and Wu, Shaojin and Huang, Mengqi and Ding, Fei and He, Qian}, title = {Scaling Multi-Identity Consistency for Image Customization via Multi-to-Multi Matching Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1906-1916} }
Scaling Instruction-Based Video Editing with a High-Quality Synthetic Dataset: Qingyan Bai,

Qiuyu Wang,

Hao Ouyang,

Yue Yu,

Hanlin Wang,

Wen Wang,

Ka Leong Cheng,

Shuailei Ma,

Yanhong Zeng,

Zichen Liu,

Yinghao Xu,

Yujun Shen,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Qingyan and Wang, Qiuyu and Ouyang, Hao and Yu, Yue and Wang, Hanlin and Wang, Wen and Cheng, Ka Leong and Ma, Shuailei and Zeng, Yanhong and Liu, Zichen and Xu, Yinghao and Shen, Yujun and Chen, Qifeng}, title = {Scaling Instruction-Based Video Editing with a High-Quality Synthetic Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37971-37981} }
Linear Fundamental Matrix Estimation from 7 or 5 Points: Taci Ata Kucukpinar,

Juan Mogollon,

Joshua Fraser,

Timothy Duff,

Kannappan Palaniappan; [pdf] [supp]
[bibtex]
@InProceedings{Kucukpinar_2026_CVPR, author = {Kucukpinar, Taci Ata and Mogollon, Juan and Fraser, Joshua and Duff, Timothy and Palaniappan, Kannappan}, title = {Linear Fundamental Matrix Estimation from 7 or 5 Points}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21464-21473} }
Training-free Motion Factorization for Compositional Video Generation: Zixuan Wang,

Ziqin Zhou,

Feng Chen,

Duo Peng,

Yixin Hu,

Changsheng Li,

Yinjie Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zixuan and Zhou, Ziqin and Chen, Feng and Peng, Duo and Hu, Yixin and Li, Changsheng and Lei, Yinjie}, title = {Training-free Motion Factorization for Compositional Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23139-23149} }
Towards Human-Imperceptible Backdoor Attacks on Text-to-Image Diffusion Models: Yiming Wu,

Chenghao Chen,

Changkun Wu,

Chong Fu,

Biru Zhu,

Zhenyu Wen,

Zhen Hong; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yiming and Chen, Chenghao and Wu, Changkun and Fu, Chong and Zhu, Biru and Wen, Zhenyu and Hong, Zhen}, title = {Towards Human-Imperceptible Backdoor Attacks on Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1503-1512} }
Cluster-Wise Spatio-Temporal Masking for Efficient Video-Language Pretraining: Weijun Zhuang,

Yuqing Huang,

Weikang Meng,

Xin Li,

Ming Liu,

Xiaopeng Hong,

Yaowei Wang,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Weijun and Huang, Yuqing and Meng, Weikang and Li, Xin and Liu, Ming and Hong, Xiaopeng and Wang, Yaowei and Zuo, Wangmeng}, title = {Cluster-Wise Spatio-Temporal Masking for Efficient Video-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39197-39207} }
Selection-as-Nonlinearity: Bridging Attention and Activation via a Joint Game-Decision Lens for Interpretable, Discriminative Visual Representations: Sudong Cai,

Shuai Yuan,

Bingzhi Chen,

Rui Mao,

Bing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Sudong and Yuan, Shuai and Chen, Bingzhi and Mao, Rui and Wang, Bing}, title = {Selection-as-Nonlinearity: Bridging Attention and Activation via a Joint Game-Decision Lens for Interpretable, Discriminative Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11621-11631} }
Decouple Your Discovery and Memory in Continual Generalized Category Discovery: Jiawei Yu,

Zijian Gao,

Xingxing Zhang,

Xuan Liu,

Huaimin Wang,

Kele Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Jiawei and Gao, Zijian and Zhang, Xingxing and Liu, Xuan and Wang, Huaimin and Xu, Kele}, title = {Decouple Your Discovery and Memory in Continual Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25022-25031} }
Let Your Image Move with Your Motion! -- Implicit Multi-Object Multi-Motion Transfer: Yuze Li,

Dong Gong,

Xiao Cao,

Junchao Yuan,

Dongsheng Li,

Lei Zhou,

Yun Sing Koh,

Cheng Yan,

Xinyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuze and Gong, Dong and Cao, Xiao and Yuan, Junchao and Li, Dongsheng and Zhou, Lei and Koh, Yun Sing and Yan, Cheng and Zhang, Xinyu}, title = {Let Your Image Move with Your Motion! -- Implicit Multi-Object Multi-Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11207-11217} }
Synthesizing Visual Concepts as Vision-Language Programs: Antonia Wüst,

Wolfgang Stammer,

Hikaru Shindo,

Lukas Helff,

Devendra Singh Dhami,

Kristian Kersting; [pdf] [supp]
[bibtex]
@InProceedings{Wust_2026_CVPR, author = {W\"ust, Antonia and Stammer, Wolfgang and Shindo, Hikaru and Helff, Lukas and Dhami, Devendra Singh and Kersting, Kristian}, title = {Synthesizing Visual Concepts as Vision-Language Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17346-17356} }
FG-Portrait: 3D Flow Guided Editable Portrait Animation: Yating Xu,

Yunqi Miao,

Evangelos Ververas,

Jiankang Deng,

Jifei Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yating and Miao, Yunqi and Ververas, Evangelos and Deng, Jiankang and Song, Jifei}, title = {FG-Portrait: 3D Flow Guided Editable Portrait Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32947-32956} }
Learning Scene Coordinate Reconstruction from Unposed Images via Pose Graph Optimization: Tze Ho Elden Tse,

Jizong Peng,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Tse_2026_CVPR, author = {Tse, Tze Ho Elden and Peng, Jizong and Yao, Angela}, title = {Learning Scene Coordinate Reconstruction from Unposed Images via Pose Graph Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21816-21825} }
Sky2Ground: A Benchmark for Site Modeling under Varying Altitude: Zengyan Wang,

Sirshapan Mitra,

Rajat Modi,

Hui Lim,

Yogesh Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zengyan and Mitra, Sirshapan and Modi, Rajat and Lim, Hui and Rawat, Yogesh}, title = {Sky2Ground: A Benchmark for Site Modeling under Varying Altitude}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12227-12236} }
Follow the Saliency: Supervised Saliency for Retrieval-augmented Dense Video Captioning: Seung hee Choi,

MinJu Jeon,

Hyunwoo Oh,

Jihwan Lee,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Seung hee and Jeon, MinJu and Oh, Hyunwoo and Lee, Jihwan and Kim, Dong-Jin}, title = {Follow the Saliency: Supervised Saliency for Retrieval-augmented Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32808-32817} }
FORCE: Transferable Visual Jailbreaking Attacks via Feature Over-Reliance CorrEction: Runqi Lin,

Alasdair Paren,

Suqin Yuan,

Muyang Li,

Philip Torr,

Adel Bibi,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Runqi and Paren, Alasdair and Yuan, Suqin and Li, Muyang and Torr, Philip and Bibi, Adel and Liu, Tongliang}, title = {FORCE: Transferable Visual Jailbreaking Attacks via Feature Over-Reliance CorrEction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8610-8620} }
Cross-domain Dual-stream Feature Disentanglement for Brain Disorder Prediction with Sparsely Labeled PET: Huabin Wang,

Xinyu Chen,

Yuan Zhou,

Fei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Huabin and Chen, Xinyu and Zhou, Yuan and Liu, Fei}, title = {Cross-domain Dual-stream Feature Disentanglement for Brain Disorder Prediction with Sparsely Labeled PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32037-32046} }
Focal-General Diffusion Model with Semantic Consistent Guidance for Sign Language Production: Yiheng Yu,

Sheng Liu,

Yuan Feng,

Zhelun Jin,

Yining Jiang,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yiheng and Liu, Sheng and Feng, Yuan and Jin, Zhelun and Jiang, Yining and Xu, Min}, title = {Focal-General Diffusion Model with Semantic Consistent Guidance for Sign Language Production}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35915-35925} }
MatchMask: Mask-Centric Generative Data Augmentation for Label-Scarce Semantic Segmentation: Yuqi Lin,

Hao Zhang,

Wenqi Shao,

Shiqu Liu,

Zhihong Gu,

Wenxiao Wang,

Xiaofei He,

Kaipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yuqi and Zhang, Hao and Shao, Wenqi and Liu, Shiqu and Gu, Zhihong and Wang, Wenxiao and He, Xiaofei and Zhang, Kaipeng}, title = {MatchMask: Mask-Centric Generative Data Augmentation for Label-Scarce Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42126-42136} }
E-3DPSM: A State Machine for Event-based Egocentric 3D Human Pose Estimation: Mayur Deshmukh,

Hiroyasu Akada,

Helge Rhodin,

Christian Theobalt,

Vladislav Golyanik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deshmukh_2026_CVPR, author = {Deshmukh, Mayur and Akada, Hiroyasu and Rhodin, Helge and Theobalt, Christian and Golyanik, Vladislav}, title = {E-3DPSM: A State Machine for Event-based Egocentric 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14017-14026} }
Semi-supervised Echocardiography Video Segmentation via Anchor Semantic Awareness and Continuous Pseudo-label Reforging: Yunpeng Fang,

Yimu Sun,

Jingxing Guo,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Yunpeng and Sun, Yimu and Guo, Jingxing and Wu, Huisi and Qin, Jing}, title = {Semi-supervised Echocardiography Video Segmentation via Anchor Semantic Awareness and Continuous Pseudo-label Reforging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8535-8544} }
Memory-Augmented Scene Understanding and Exploration for Open-World Aerial Object-Goal Navigation: Jiacong Zhou,

Jiaxu Miao,

Yourun Lin,

Xianyun Wang,

Jun Xiao,

Jun Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Jiacong and Miao, Jiaxu and Lin, Yourun and Wang, Xianyun and Xiao, Jun and Yu, Jun}, title = {Memory-Augmented Scene Understanding and Exploration for Open-World Aerial Object-Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21616-21626} }
The Devil is in Attention Sharing: Improving Complex Non-rigid Image Editing Faithfulness via Attention Synergy: Zhuo Chen,

Fanyue Wei,

Runze Xu,

Jingjing Li,

Lixin Duan,

Angela Yao,

Wen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuo and Wei, Fanyue and Xu, Runze and Li, Jingjing and Duan, Lixin and Yao, Angela and Li, Wen}, title = {The Devil is in Attention Sharing: Improving Complex Non-rigid Image Editing Faithfulness via Attention Synergy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8237-8246} }
GTR-Turbo: Merged Checkpoint is Secretly a Free Teacher for Agentic VLM Training: Tong Wei,

Yijun Yang,

Changhao Zhang,

Junliang Xing,

Yuanchun Shi,

Zongqing Lu,

Deheng Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Tong and Yang, Yijun and Zhang, Changhao and Xing, Junliang and Shi, Yuanchun and Lu, Zongqing and Ye, Deheng}, title = {GTR-Turbo: Merged Checkpoint is Secretly a Free Teacher for Agentic VLM Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26476-26486} }
ReMoGen: Real-time Human Interaction-to-Reaction Generation via Modular Learning from Diverse Data: Yaoqin Ye,

Yiteng Xu,

Qin Sun,

Xinge Zhu,

Yujing Sun,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yaoqin and Xu, Yiteng and Sun, Qin and Zhu, Xinge and Sun, Yujing and Ma, Yuexin}, title = {ReMoGen: Real-time Human Interaction-to-Reaction Generation via Modular Learning from Diverse Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16475-16485} }
PALM: Progress-Aware Policy Learning via Affordance Reasoning for Long-Horizon Robotic Manipulation: Yuanzhe Liu,

Jingyuan Zhu,

Yuchen Mo,

Gen Li,

Xu Cao,

Jin Jin,

Yifan Shen,

Zhengyuan Li,

Tianjiao Yu,

Wenzhen Yuan,

Fangqiang Ding,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuanzhe and Zhu, Jingyuan and Mo, Yuchen and Li, Gen and Cao, Xu and Jin, Jin and Shen, Yifan and Li, Zhengyuan and Yu, Tianjiao and Yuan, Wenzhen and Ding, Fangqiang and Lourentzou, Ismini}, title = {PALM: Progress-Aware Policy Learning via Affordance Reasoning for Long-Horizon Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28096-28110} }
Decoupling Vision and Language: Codebook Anchored Visual Adaptation: Jason Wu,

Tianchen Zhao,

Chang Liu,

Jiarui Cai,

Zheng Zhang,

Zhuowei Li,

Aaditya Singh,

Xiang Xu,

Mani Srivastava,

Jonathan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jason and Zhao, Tianchen and Liu, Chang and Cai, Jiarui and Zhang, Zheng and Li, Zhuowei and Singh, Aaditya and Xu, Xiang and Srivastava, Mani and Wu, Jonathan}, title = {Decoupling Vision and Language: Codebook Anchored Visual Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36957-36967} }
DSFlash: Comprehensive Panoptic Scene Graph Generation in Realtime: Julian Lorenz,

Vladyslav Kovganko,

Elias Kohout,

Mrunmai Phatak,

Daniel Kienzle,

Rainer Lienhart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lorenz_2026_CVPR, author = {Lorenz, Julian and Kovganko, Vladyslav and Kohout, Elias and Phatak, Mrunmai and Kienzle, Daniel and Lienhart, Rainer}, title = {DSFlash: Comprehensive Panoptic Scene Graph Generation in Realtime}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17388-17398} }
Reliev3R: Relieving Feed-forward 3D Reconstruction from Multi-View Geometric Annotations: Youyu Chen,

Junjun Jiang,

Yueru Luo,

Kui Jiang,

Xianming Liu,

Xu Yan,

Dave Zhenyu Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Youyu and Jiang, Junjun and Luo, Yueru and Jiang, Kui and Liu, Xianming and Yan, Xu and Chen, Dave Zhenyu}, title = {Reliev3R: Relieving Feed-forward 3D Reconstruction from Multi-View Geometric Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21860-21869} }
Forging a Dynamic Memory: Retrieval-Guided Continual Learning for Generalist Medical Foundation Models: Zizhi Chen,

Yizhen Gao,

Minghao Han,

Yizhou Liu,

Zhaoyu Chen,

Dingkang Yang,

Lihua Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zizhi and Gao, Yizhen and Han, Minghao and Liu, Yizhou and Chen, Zhaoyu and Yang, Dingkang and Zhang, Lihua}, title = {Forging a Dynamic Memory: Retrieval-Guided Continual Learning for Generalist Medical Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32309-32321} }
OmniZip: Learning a Unified and Lightweight Lossless Compressor for Multi-Modal Data: Yan Zhao,

Zhengxue Cheng,

Junxuan Zhang,

Dajiang Zhou,

Qunshan Gu,

Qi Wang,

Li Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yan and Cheng, Zhengxue and Zhang, Junxuan and Zhou, Dajiang and Gu, Qunshan and Wang, Qi and Song, Li}, title = {OmniZip: Learning a Unified and Lightweight Lossless Compressor for Multi-Modal Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5337-5347} }
AdapTok: Learning Adaptive and Temporally Causal Video Tokenization in a 1D Latent Space: Yan Li,

Changyao Tian,

Renqiu Xia,

Ning Liao,

Weiwei Guo,

Hongsheng Li,

Jifeng Dai,

Hao Li,

Xue Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Tian, Changyao and Xia, Renqiu and Liao, Ning and Guo, Weiwei and Li, Hongsheng and Dai, Jifeng and Li, Hao and Yang, Xue}, title = {AdapTok: Learning Adaptive and Temporally Causal Video Tokenization in a 1D Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16163-16172} }
Revisiting Visual Corruptions in LVLMs: A Shape-Texture Perspective on Model Failures: Xinkuan Qiu,

Meina Kan,

Zhenliang He,

Yongbin Zhou,

Shiguang Shan; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xinkuan and Kan, Meina and He, Zhenliang and Zhou, Yongbin and Shan, Shiguang}, title = {Revisiting Visual Corruptions in LVLMs: A Shape-Texture Perspective on Model Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40845-40854} }
Seeing Conversations: Communication Context Identification in Egocentric Video: Tobias Dorszewski,

Jens Hjortkjær; [pdf]
[bibtex]
@InProceedings{Dorszewski_2026_CVPR, author = {Dorszewski, Tobias and Hjortkj{\ae}r, Jens}, title = {Seeing Conversations: Communication Context Identification in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38816-38825} }
ProjFlow: Projection Sampling with Flow Matching for Zero-Shot Exact Spatial Motion Control: Akihisa Watanabe,

Qing Yu,

Edgar Simo-Serra,

Kent Fujiwara; [pdf] [supp]
[bibtex]
@InProceedings{Watanabe_2026_CVPR, author = {Watanabe, Akihisa and Yu, Qing and Simo-Serra, Edgar and Fujiwara, Kent}, title = {ProjFlow: Projection Sampling with Flow Matching for Zero-Shot Exact Spatial Motion Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2305-2315} }
Attend Before Attention: Efficient and Scalable Video Understanding via Autoregressive Gazing: Baifeng Shi,

Stephanie Fu,

Long Lian,

Hanrong Ye,

David Eigen,

Aaron Reite,

Jan Kautz,

Boyi Li,

David M. Chan,

Trevor Darrell,

Pavlo Molchanov,

Hongxu Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Baifeng and Fu, Stephanie and Lian, Long and Ye, Hanrong and Eigen, David and Reite, Aaron and Kautz, Jan and Li, Boyi and Chan, David M. and Darrell, Trevor and Molchanov, Pavlo and Yin, Hongxu}, title = {Attend Before Attention: Efficient and Scalable Video Understanding via Autoregressive Gazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17022-17034} }
Diverse Video Generation with Determinantal Point Process-Guided Policy Optimization: Tahira Kazimi,

Connor Dunlop,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kazimi_2026_CVPR, author = {Kazimi, Tahira and Dunlop, Connor and Yanardag, Pinar}, title = {Diverse Video Generation with Determinantal Point Process-Guided Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12839-12848} }
Text-guided Feature Disentanglement for Cross-modal Gait Recognition: Zhiyang Lu,

Ming Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zhiyang and Cheng, Ming}, title = {Text-guided Feature Disentanglement for Cross-modal Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25484-25493} }
Point Cloud as a Foreign Language for Multi-modal Large Language Model: Sneha Paul,

Zachary Patterson,

Nizar Bouguila; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paul_2026_CVPR, author = {Paul, Sneha and Patterson, Zachary and Bouguila, Nizar}, title = {Point Cloud as a Foreign Language for Multi-modal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16676-16687} }
Proxy-GS: Unified Occlusion Priors for Training and Inference in Structured 3D Gaussian Splatting: Yuanyuan Gao,

Yuning Gong,

Yifei Liu,

Jingfeng Li,

Dan Xu,

Yanci Zhang,

Dingwen Zhang,

Xiao Sun,

Zhihang Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuanyuan and Gong, Yuning and Liu, Yifei and Li, Jingfeng and Xu, Dan and Zhang, Yanci and Zhang, Dingwen and Sun, Xiao and Zhong, Zhihang}, title = {Proxy-GS: Unified Occlusion Priors for Training and Inference in Structured 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7330-7339} }
AnyPcc: Compressing Any Point Cloud with a Single Universal Model: Kangli Wang,

Qianxi Yi,

Yuqi Ye,

Shihao Li,

Wei Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Kangli and Yi, Qianxi and Ye, Yuqi and Li, Shihao and Gao, Wei}, title = {AnyPcc: Compressing Any Point Cloud with a Single Universal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2972-2982} }
InternData-A1: Pioneering High-Fidelity Synthetic Data for Pre-training Generalist Policy: Yang Tian,

Yuyin Yang,

Yiman Xie,

Zetao Cai,

Xu Shi,

Ning Gao,

Hangxu Liu,

Xuekun Jiang,

Zherui Qiu,

Feng Yuan,

Yaping Li,

Ping Wang,

Junhao Cai,

Jia Zeng,

Hao Dong,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Yang and Yang, Yuyin and Xie, Yiman and Cai, Zetao and Shi, Xu and Gao, Ning and Liu, Hangxu and Jiang, Xuekun and Qiu, Zherui and Yuan, Feng and Li, Yaping and Wang, Ping and Cai, Junhao and Zeng, Jia and Dong, Hao and Pang, Jiangmiao}, title = {InternData-A1: Pioneering High-Fidelity Synthetic Data for Pre-training Generalist Policy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {976-985} }
SparseCam4D: Spatio-Temporally Consistent 4D Reconstruction from Sparse Cameras: Weihong Pan,

Xiaoyu Zhang,

Zhuang Zhang,

Zhichao Ye,

Nan Wang,

Haomin Liu,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Weihong and Zhang, Xiaoyu and Zhang, Zhuang and Ye, Zhichao and Wang, Nan and Liu, Haomin and Zhang, Guofeng}, title = {SparseCam4D: Spatio-Temporally Consistent 4D Reconstruction from Sparse Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33237-33247} }
Training-Free Open-Vocabulary Camouflaged Object Segmentation via Fine-Grained Object Binding and Adaptive Hybrid Prompt: Peng Ren,

Cheng Jiang,

Chuande Yang,

Fuming Sun,

Tian Bai; [pdf]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Peng and Jiang, Cheng and Yang, Chuande and Sun, Fuming and Bai, Tian}, title = {Training-Free Open-Vocabulary Camouflaged Object Segmentation via Fine-Grained Object Binding and Adaptive Hybrid Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24960-24969} }
ART: Articulated Reconstruction Transformer: Zizhang Li,

Cheng Zhang,

Zhengqin Li,

Henry Howard-Jenkins,

Zhaoyang Lv,

Chen Geng,

Jiajun Wu,

Richard Newcombe,

Jakob Engel,

Zhao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zizhang and Zhang, Cheng and Li, Zhengqin and Howard-Jenkins, Henry and Lv, Zhaoyang and Geng, Chen and Wu, Jiajun and Newcombe, Richard and Engel, Jakob and Dong, Zhao}, title = {ART: Articulated Reconstruction Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7468-7479} }
RAVEN: Radar Adaptive Vision Encoders for Efficient Chirp-wise Object Detection and Segmentation: Anuvab Sen,

Mir Sayeed Mohammad,

Saibal Mukhopadhyay; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sen_2026_CVPR, author = {Sen, Anuvab and Mohammad, Mir Sayeed and Mukhopadhyay, Saibal}, title = {RAVEN: Radar Adaptive Vision Encoders for Efficient Chirp-wise Object Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17938-17947} }
Learning to Diversify and Focus: A Reinforcement Framework for Open-Vocabulary HOI Detection: Yongchao Xu,

Jiawei Liu,

Junfeng Wang,

Sen Tao,

Na Jiang,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yongchao and Liu, Jiawei and Wang, Junfeng and Tao, Sen and Jiang, Na and Zha, Zheng-Jun}, title = {Learning to Diversify and Focus: A Reinforcement Framework for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34673-34682} }
SAIDO: Generalizable Detection of AI-Generated Images via Scene-Aware and Importance-Guided Dynamic Optimization in Continual Learning: Yongkang Hu,

Yu Cheng,

Yushuo Zhang,

Yuan Xie,

Zhaoxia Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yongkang and Cheng, Yu and Zhang, Yushuo and Xie, Yuan and Yin, Zhaoxia}, title = {SAIDO: Generalizable Detection of AI-Generated Images via Scene-Aware and Importance-Guided Dynamic Optimization in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3876-3886} }
Generative Video Motion Editing with 3D Point Tracks: Yao-Chih Lee,

Zhoutong Zhang,

Jiahui Huang,

Jui-Hsien Wang,

Joon-Young Lee,

Jia-Bin Huang,

Eli Shechtman,

Zhengqi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yao-Chih and Zhang, Zhoutong and Huang, Jiahui and Wang, Jui-Hsien and Lee, Joon-Young and Huang, Jia-Bin and Shechtman, Eli and Li, Zhengqi}, title = {Generative Video Motion Editing with 3D Point Tracks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18306-18318} }
FireScope: Wildfire Risk Raster Prediction With a Chain-of-Thought Oracle: Mario Markov,

Stefan Ailuro,

Luc Van Gool,

Konrad Schindler,

Danda Pani Paudel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Markov_2026_CVPR, author = {Markov, Mario and Ailuro, Stefan and Van Gool, Luc and Schindler, Konrad and Paudel, Danda Pani}, title = {FireScope: Wildfire Risk Raster Prediction With a Chain-of-Thought Oracle}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34795-34805} }
Event6D: Event-based Novel Object 6D Pose Tracking: Jae-Young Kang,

Hoonhee Cho,

Taeyeop Lee,

Minjun Kang,

Bowen Wen,

Youngho Kim,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Jae-Young and Cho, Hoonhee and Lee, Taeyeop and Kang, Minjun and Wen, Bowen and Kim, Youngho and Yoon, Kuk-Jin}, title = {Event6D: Event-based Novel Object 6D Pose Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15091-15104} }
FLOW: Optimal Transport-Driven Feature Warping for Generalized Remote Physiological Measurement: Bo Zhao,

Junzhe Cao,

Dan Guo,

Dongmin Huang,

Wenjin Wang,

Tao Tan,

Yue Sun,

Zitong Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Bo and Cao, Junzhe and Guo, Dan and Huang, Dongmin and Wang, Wenjin and Tan, Tao and Sun, Yue and Yu, Zitong}, title = {FLOW: Optimal Transport-Driven Feature Warping for Generalized Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28481-28491} }
Region-Wise Correspondence Prediction between Manga Line Art Images: Yingxuan Li,

Jiafeng Mao,

Qianru Qiu,

Yusuke Matsui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yingxuan and Mao, Jiafeng and Qiu, Qianru and Matsui, Yusuke}, title = {Region-Wise Correspondence Prediction between Manga Line Art Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15334-15342} }
Deciphering Genotype-Phenotype Mechanisms from High-Content Profiling via Knowledge-Guided Multi-modal Graph Learning: Hanjing Lin,

Jiahua Rao,

Youhan Sun,

Jiancong Xie,

Yuedong Yang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Hanjing and Rao, Jiahua and Sun, Youhan and Xie, Jiancong and Yang, Yuedong}, title = {Deciphering Genotype-Phenotype Mechanisms from High-Content Profiling via Knowledge-Guided Multi-modal Graph Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41804-41814} }
Occluded Human Body Capture with Frequency Domain Denoising Prior: Buzhen Huang,

Chongyang Xu,

Wentao Tang,

Yuan Shu,

Jingyi Ju,

Binghui Zuo,

Yangang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Buzhen and Xu, Chongyang and Tang, Wentao and Shu, Yuan and Ju, Jingyi and Zuo, Binghui and Wang, Yangang}, title = {Occluded Human Body Capture with Frequency Domain Denoising Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13930-13939} }
Unsupervised Multi-agent and Single-agent Perception from Cooperative Views: Haochen Yang,

Baolu Li,

Lei Li,

Delin Ren,

Jiacheng Guo,

Minghai Qin,

Tianyun Zhang,

Hongkai Yu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haochen and Li, Baolu and Li, Lei and Ren, Delin and Guo, Jiacheng and Qin, Minghai and Zhang, Tianyun and Yu, Hongkai}, title = {Unsupervised Multi-agent and Single-agent Perception from Cooperative Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25810-25819} }
MeanFuser: Fast One-Step Multi-Modal Trajectory Generation and Adaptive Reconstruction via MeanFlow for End-to-End Autonomous Driving: Junli Wang,

Yinan Zheng,

Xueyi Liu,

Zebin Xing,

Pengfei Li,

Kun Ma,

Hangjun Ye,

Guang Chen,

Guang Li,

Long Chen,

Zhongpu Xia,

Qichao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Junli and Zheng, Yinan and Liu, Xueyi and Xing, Zebin and Li, Pengfei and Ma, Kun and Ye, Hangjun and Chen, Guang and Li, Guang and Chen, Long and Xia, Zhongpu and Zhang, Qichao}, title = {MeanFuser: Fast One-Step Multi-Modal Trajectory Generation and Adaptive Reconstruction via MeanFlow for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17884-17893} }
FedBPrompt: Federated Domain Generalization Person Re-Identification via Body Distribution Aware Visual Prompts: Xin Xu,

Weilong Li,

Wei Liu,

Wenke Huang,

Zhixi Yu,

Bin Yang,

Xiaoying Liao,

Kui Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Xin and Li, Weilong and Liu, Wei and Huang, Wenke and Yu, Zhixi and Yang, Bin and Liao, Xiaoying and Jiang, Kui}, title = {FedBPrompt: Federated Domain Generalization Person Re-Identification via Body Distribution Aware Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40448-40457} }
MedCLIPSeg: Probabilistic Vision-Language Adaptation for Data-Efficient and Generalizable Medical Image Segmentation: Taha Koleilat,

Hojat Asgariandehkordi,

Omid Nejatimanzari,

Berardino Barile,

Yiming Xiao,

Hassan Rivaz; [pdf] [supp]
[bibtex]
@InProceedings{Koleilat_2026_CVPR, author = {Koleilat, Taha and Asgariandehkordi, Hojat and Nejatimanzari, Omid and Barile, Berardino and Xiao, Yiming and Rivaz, Hassan}, title = {MedCLIPSeg: Probabilistic Vision-Language Adaptation for Data-Efficient and Generalizable Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1406-1417} }
CLIP-like Model as a Foundational Density Ratio Estimator: Fumiya Uchiyama,

Rintaro Yanagi,

Shohei Taniguchi,

Shota Takashiro,

Masahiro Suzuki,

Hirokatsu Kataoka,

Yusuke Iwasawa,

Yutaka Matsuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Uchiyama_2026_CVPR, author = {Uchiyama, Fumiya and Yanagi, Rintaro and Taniguchi, Shohei and Takashiro, Shota and Suzuki, Masahiro and Kataoka, Hirokatsu and Iwasawa, Yusuke and Matsuo, Yutaka}, title = {CLIP-like Model as a Foundational Density Ratio Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15784-15793} }
Edges Compete for Trust: Group Relative Edge Optimization for Building Reconstruction from Point Clouds: Yujun Liu,

Ruisheng Wang,

Xiang Ao,

Haoyuan Shen,

Kuihao Wang,

Kun Zhou,

Qingquan Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yujun and Wang, Ruisheng and Ao, Xiang and Shen, Haoyuan and Wang, Kuihao and Zhou, Kun and Li, Qingquan}, title = {Edges Compete for Trust: Group Relative Edge Optimization for Building Reconstruction from Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17121-17131} }
CycleManip: Enabling Cycle-based Manipulation via Effective History Perception and Understanding: Yi-Lin Wei,

Haoran Liao,

Yuhao Lin,

Pengyue Wang,

Zhizhao Liang,

Guiliang Liu,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yi-Lin and Liao, Haoran and Lin, Yuhao and Wang, Pengyue and Liang, Zhizhao and Liu, Guiliang and Zheng, Wei-Shi}, title = {CycleManip: Enabling Cycle-based Manipulation via Effective History Perception and Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20780-20789} }
ARMFlow: AutoRegressive MeanFlow for Online 3D Human Reaction Generation: Zichen Geng,

Zeeshan Hayder,

Wei Liu,

Hesheng Wang,

Ajmal Saeed Mian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Zichen and Hayder, Zeeshan and Liu, Wei and Wang, Hesheng and Mian, Ajmal Saeed}, title = {ARMFlow: AutoRegressive MeanFlow for Online 3D Human Reaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30718-30728} }
Decoupled and Reusable Adaptation for Efficient Cross-Modal Transfer: Yajing Liu,

Yumeng Zhang,

Yue Si,

Baojie Fan,

Jiandong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yajing and Zhang, Yumeng and Si, Yue and Fan, Baojie and Tian, Jiandong}, title = {Decoupled and Reusable Adaptation for Efficient Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {812-822} }
Olbedo: An Albedo and Shading Aerial Dataset for Large-Scale Outdoor Environments: Shuang Song,

Debao Huang,

Deyan Deng,

Haolin Xiong,

Yang Tang,

Yajie Zhao,

Rongjun Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Shuang and Huang, Debao and Deng, Deyan and Xiong, Haolin and Tang, Yang and Zhao, Yajie and Qin, Rongjun}, title = {Olbedo: An Albedo and Shading Aerial Dataset for Large-Scale Outdoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6474-6483} }
UnityVideo: Unified Multi-Modal Multi-Task Learning for Enhancing World-Aware Video Generation: Jiehui Huang,

Yuechen Zhang,

Xu He,

Yuan Gao,

Zhi Cen,

Bin Xia,

Yan Zhou,

Xin Tao,

Pengfei Wan,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiehui and Zhang, Yuechen and He, Xu and Gao, Yuan and Cen, Zhi and Xia, Bin and Zhou, Yan and Tao, Xin and Wan, Pengfei and Jia, Jiaya}, title = {UnityVideo: Unified Multi-Modal Multi-Task Learning for Enhancing World-Aware Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4471-4481} }
Wave-Former: Through-Occlusion 3D Reconstruction via Wireless Shape Completion: Laura Dodds,

Maisy Lam,

Waleed Akbar,

Yibo Cheng,

Fadel Adib; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dodds_2026_CVPR, author = {Dodds, Laura and Lam, Maisy and Akbar, Waleed and Cheng, Yibo and Adib, Fadel}, title = {Wave-Former: Through-Occlusion 3D Reconstruction via Wireless Shape Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21713-21724} }
FUSAR-GPT: A Spatiotemporal Feature-Embedded and Two-Stage Decoupled Visual Language Model for SAR Imagery: Xiaokun Zhang,

Yi Yang,

Ziqi Ye,

Baiyun Baiyun,

Xiaorong Guo,

Qingchen Fang,

Ruyi Zhang,

Xinpeng Zhou,

Haipeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaokun and Yang, Yi and Ye, Ziqi and Baiyun, Baiyun and Guo, Xiaorong and Fang, Qingchen and Zhang, Ruyi and Zhou, Xinpeng and Wang, Haipeng}, title = {FUSAR-GPT: A Spatiotemporal Feature-Embedded and Two-Stage Decoupled Visual Language Model for SAR Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42158-42168} }
IrisFP: Adversarial-Example-based Model Fingerprinting with Enhanced Uniqueness and Robustness: Ziye Geng,

Guang Yang,

Yihang Chen,

Changqing Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Ziye and Yang, Guang and Chen, Yihang and Luo, Changqing}, title = {IrisFP: Adversarial-Example-based Model Fingerprinting with Enhanced Uniqueness and Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39383-39392} }
ELVIS: Enhance Low-Light for Video Instance Segmentation in the Dark: Joanne Lin,

Ruirui Lin,

Yini Li,

David Bull,

Nantheera Anantrasirichai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Joanne and Lin, Ruirui and Li, Yini and Bull, David and Anantrasirichai, Nantheera}, title = {ELVIS: Enhance Low-Light for Video Instance Segmentation in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25011-25021} }
End-to-End Hyper-Relational Information Extraction for Engineering Diagrams via Dynamically Tokenized Relation Transformer: Tianyou Bai,

Yan-Ming Zhang,

Zixiang Zhang,

Jibin Zhou,

Fei Yin,

Cheng-Lin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Tianyou and Zhang, Yan-Ming and Zhang, Zixiang and Zhou, Jibin and Yin, Fei and Liu, Cheng-Lin}, title = {End-to-End Hyper-Relational Information Extraction for Engineering Diagrams via Dynamically Tokenized Relation Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24438-24448} }
CrossHOI-Bench: A Unified Benchmark for HOI Evaluation across Vision-Language Models and HOI-Specific Methods: Qinqian Lei,

Bo Wang,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Qinqian and Wang, Bo and Tan, Robby T.}, title = {CrossHOI-Bench: A Unified Benchmark for HOI Evaluation across Vision-Language Models and HOI-Specific Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38520-38531} }
SkySense-VITA: Towards Universal In-context Segmentation of Multi-modal Remote Sensing Imagery: Kang Wu,

Lei Yu,

Junwei Luo,

Bo Dang,

Junjian Zhang,

Xiangyuan Cai,

Hongwei Hu,

Jingdong Chen,

Yansheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Kang and Yu, Lei and Luo, Junwei and Dang, Bo and Zhang, Junjian and Cai, Xiangyuan and Hu, Hongwei and Chen, Jingdong and Li, Yansheng}, title = {SkySense-VITA: Towards Universal In-context Segmentation of Multi-modal Remote Sensing Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20553-20563} }
Drift-Resilient Temporal Priors for Visual Tracking: Yuqing Huang,

Liting Lin,

Weijun Zhuang,

Zhenyu He,

Xin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yuqing and Lin, Liting and Zhuang, Weijun and He, Zhenyu and Li, Xin}, title = {Drift-Resilient Temporal Priors for Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6847-6856} }
MuKV: Multi-Grained KV Cache Compression for Long Streaming Video Question-Answering: Junbin Xiao,

Jiajun Chen,

Tianxiang Sun,

Xun Yang,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junbin and Chen, Jiajun and Sun, Tianxiang and Yang, Xun and Yao, Angela}, title = {MuKV: Multi-Grained KV Cache Compression for Long Streaming Video Question-Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11381-11391} }
Phrase-grounded APO for Improving Chest X-ray Report Generation: Raziuddin Mahmood,

Tanveer Syeda-Mahmood; [pdf]
[bibtex]
@InProceedings{Mahmood_2026_CVPR, author = {Mahmood, Raziuddin and Syeda-Mahmood, Tanveer}, title = {Phrase-grounded APO for Improving Chest X-ray Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28254-28263} }
Unleashing Stealthy Backdoor Pandemic by Infecting a Single Diffusion Model: Mohaiminul Al Nahian,

Abeer Matar Almalky,

Sabbir Ahmed,

Abdullah Al Arafat,

Mamshad Nayeem Rizve,

Adnan Siraj Rakin; [pdf] [supp]
[bibtex]
@InProceedings{Al_Nahian_2026_CVPR, author = {Al Nahian, Mohaiminul and Almalky, Abeer Matar and Ahmed, Sabbir and Al Arafat, Abdullah and Rizve, Mamshad Nayeem and Rakin, Adnan Siraj}, title = {Unleashing Stealthy Backdoor Pandemic by Infecting a Single Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34889-34899} }
Joint-Aligned Latent Action: Towards Scalable VLA Pretraining in the Wild: Hao Luo,

Ye Wang,

Wanpeng Zhang,

Haoqi Yuan,

Yicheng Feng,

Haiweng Xu,

Sipeng Zheng,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Hao and Wang, Ye and Zhang, Wanpeng and Yuan, Haoqi and Feng, Yicheng and Xu, Haiweng and Zheng, Sipeng and Lu, Zongqing}, title = {Joint-Aligned Latent Action: Towards Scalable VLA Pretraining in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35047-35058} }
SIGMA: Selective-Interleaved Generation with Multi-Attribute Tokens: Xiaoyan Zhang,

Zechen Bai,

Haofan Wang,

Yiren Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoyan and Bai, Zechen and Wang, Haofan and Song, Yiren}, title = {SIGMA: Selective-Interleaved Generation with Multi-Attribute Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38165-38175} }
TaskForce: Cooperative Multi-agent Reinforcement Learning for Multi-task Optimization: Wonhyeok Choi,

Kyumin Hwang,

Jihun Park,

Kyoungmin Lee,

Seunghun Lee,

Jaeyeul Kim,

Minwoo Choi,

Sunghoon Im; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Wonhyeok and Hwang, Kyumin and Park, Jihun and Lee, Kyoungmin and Lee, Seunghun and Kim, Jaeyeul and Choi, Minwoo and Im, Sunghoon}, title = {TaskForce: Cooperative Multi-agent Reinforcement Learning for Multi-task Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36871-36880} }
MMLandmarks: a Cross-View Instance-Level Benchmark for Geo-Spatial Understanding: Oskar Kristoffersen,

Alba Reinders Sánchez,

Morten Rieger Hannemose,

Anders Bjorholm Dahl,

Dim P. Papadopoulos; [pdf] [supp]
[bibtex]
@InProceedings{Kristoffersen_2026_CVPR, author = {Kristoffersen, Oskar and S\'anchez, Alba Reinders and Hannemose, Morten Rieger and Dahl, Anders Bjorholm and Papadopoulos, Dim P.}, title = {MMLandmarks: a Cross-View Instance-Level Benchmark for Geo-Spatial Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26452-26464} }
Flow Matching for Multimodal Distributions: Gaoxiang Luo,

Frank Cole,

Sihang Zhang,

Yuxiang Wan,

Yulong Lu,

Ju Sun; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Gaoxiang and Cole, Frank and Zhang, Sihang and Wan, Yuxiang and Lu, Yulong and Sun, Ju}, title = {Flow Matching for Multimodal Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23260-23271} }
From Attraction to Equilibrium: Physics-Inspired Semantic Gravitons for Zero-Shot Anomaly Detection: Yuwen Pan,

Yuan Wang,

Shaohui Li,

Zhi Li,

Yu Liu,

You He; [pdf]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Yuwen and Wang, Yuan and Li, Shaohui and Li, Zhi and Liu, Yu and He, You}, title = {From Attraction to Equilibrium: Physics-Inspired Semantic Gravitons for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35628-35637} }
Spatia: Video Generation with Updatable Spatial Memory: Jinjing Zhao,

Fangyun Wei,

Zhening Liu,

Hongyang Zhang,

Chang Xu,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jinjing and Wei, Fangyun and Liu, Zhening and Zhang, Hongyang and Xu, Chang and Lu, Yan}, title = {Spatia: Video Generation with Updatable Spatial Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4245-4257} }
SegGBC: Justifiable Coarse-to-Fine Granular-Ball Computing for Enhancing Clustering Image Segmentation: Qianpeng Chong,

Wenyi Zeng,

Xiuxuan Shen,

Jiajie Li,

Qian Yin,

Xin Zheng; [pdf]
[bibtex]
@InProceedings{Chong_2026_CVPR, author = {Chong, Qianpeng and Zeng, Wenyi and Shen, Xiuxuan and Li, Jiajie and Yin, Qian and Zheng, Xin}, title = {SegGBC: Justifiable Coarse-to-Fine Granular-Ball Computing for Enhancing Clustering Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42104-42114} }
GaussianMatch: Semi-Supervised Regression with Pseudo-Label Filtering via Multi-View Gaussian Consistency: Yin Wang,

Hao Lu,

Zixuan Wang,

Zhen Qin,

Li Kuang,

Mengchu Zhou,

Shuiguang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yin and Lu, Hao and Wang, Zixuan and Qin, Zhen and Kuang, Li and Zhou, Mengchu and Deng, Shuiguang}, title = {GaussianMatch: Semi-Supervised Regression with Pseudo-Label Filtering via Multi-View Gaussian Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31985-31994} }
Fresco: Frequency-Spatial Consistent Optimization for Fine-Grained Head Avatar Modeling: Shikun Zhang,

Yong Li,

Yiqun Wang,

Qiuhong Ke,

Cunjian Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shikun and Li, Yong and Wang, Yiqun and Ke, Qiuhong and Chen, Cunjian}, title = {Fresco: Frequency-Spatial Consistent Optimization for Fine-Grained Head Avatar Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40130-40139} }
Long-RVOS: A Comprehensive Benchmark for Long-term Referring Video Object Segmentation: Tianming Liang,

Haichao Jiang,

Yuting Yang,

Chaolei Tan,

Shuai Li,

Wei-Shi Zheng,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Tianming and Jiang, Haichao and Yang, Yuting and Tan, Chaolei and Li, Shuai and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Long-RVOS: A Comprehensive Benchmark for Long-term Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39497-39507} }
ORV: 4D Occupancy-centric Robot Video Generation: Xiuyu Yang,

Bohan Li,

Shaocong Xu,

Nan Wang,

Chongjie Ye,

Zhaoxi Chen,

Minghan Qin,

Yikang Ding,

Zheng Zhu,

Xin Jin,

Hang Zhao,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiuyu and Li, Bohan and Xu, Shaocong and Wang, Nan and Ye, Chongjie and Chen, Zhaoxi and Qin, Minghan and Ding, Yikang and Zhu, Zheng and Jin, Xin and Zhao, Hang and Zhao, Hao}, title = {ORV: 4D Occupancy-centric Robot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1053-1066} }
ParkGaussian: Surround-view 3D Gaussian Splatting for Autonomous Parking: Xiaobao Wei,

Zhangjie Ye,

Yuxiang Gu,

Zunjie Zhu,

Yunfei Guo,

Yingying Shen,

Shan Zhao,

Ming Lu,

Haiyang Sun,

Bing Wang,

Guang Chen,

Rongfeng Lu,

Hangjun Ye; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Xiaobao and Ye, Zhangjie and Gu, Yuxiang and Zhu, Zunjie and Guo, Yunfei and Shen, Yingying and Zhao, Shan and Lu, Ming and Sun, Haiyang and Wang, Bing and Chen, Guang and Lu, Rongfeng and Ye, Hangjun}, title = {ParkGaussian: Surround-view 3D Gaussian Splatting for Autonomous Parking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19085-19095} }
Hearing the Room Through the Shape of the Drum: Modal-Guided Sound Recovery from Multi-Point Surface Vibrations: Shai Bagon,

Matan Kichler,

Mark Sheinin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bagon_2026_CVPR, author = {Bagon, Shai and Kichler, Matan and Sheinin, Mark}, title = {Hearing the Room Through the Shape of the Drum: Modal-Guided Sound Recovery from Multi-Point Surface Vibrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14451-14460} }
Curvature-Aware Captioning: Leveraging Geodesic Attention for 3D Scene Understanding: Ziyao He,

Yingjie Liu,

Zhang Yangrui,

Mingsong Chen,

Xuan Tang,

Xian Wei; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Ziyao and Liu, Yingjie and Yangrui, Zhang and Chen, Mingsong and Tang, Xuan and Wei, Xian}, title = {Curvature-Aware Captioning: Leveraging Geodesic Attention for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30997-31007} }
Personalized Image Descriptions from Attention Sequences: Ruoyu Xue,

Hieu Le,

Jingyi Xu,

Sounak Mondal,

Abe Leite,

Gregory Zelinsky,

Minh Hoai,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Ruoyu and Le, Hieu and Xu, Jingyi and Mondal, Sounak and Leite, Abe and Zelinsky, Gregory and Hoai, Minh and Samaras, Dimitris}, title = {Personalized Image Descriptions from Attention Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40042-40052} }
RiskProp: Collision-Anchored Self-Supervised Risk Propagation For Early Accident Anticipation: Yiyang Zou,

Tianhao Zhao,

Peilun Xiao,

Hongyu Jin,

Longyu Qi,

Yuxuan Li,

Liyin Liang,

Yifeng Qian,

Chunbo Lai,

Yutian Lin,

Zhihui Li,

Yu Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Yiyang and Zhao, Tianhao and Xiao, Peilun and Jin, Hongyu and Qi, Longyu and Li, Yuxuan and Liang, Liyin and Qian, Yifeng and Lai, Chunbo and Lin, Yutian and Li, Zhihui and Wu, Yu}, title = {RiskProp: Collision-Anchored Self-Supervised Risk Propagation For Early Accident Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2768-2777} }
MER-Tracker: Towards High-Speed 3D Point Tracking via Multi-View Event-RGB Hybrid Cameras: Yiqian Chang,

Qinghong Ye,

Haoran Xu,

Jianing Li,

Dongyang Ma,

Xuan Wang,

Wei Zhang,

Yonghong Tian,

Peixi Peng; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Yiqian and Ye, Qinghong and Xu, Haoran and Li, Jianing and Ma, Dongyang and Wang, Xuan and Zhang, Wei and Tian, Yonghong and Peng, Peixi}, title = {MER-Tracker: Towards High-Speed 3D Point Tracking via Multi-View Event-RGB Hybrid Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37032-37042} }
OVSegDT: Segmenting Transformer for Open-Vocabulary Object Goal Navigation: Tatiana Zemskova,

Aleksei Staroverov,

Dmitry Yudin,

Aleksandr Panov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zemskova_2026_CVPR, author = {Zemskova, Tatiana and Staroverov, Aleksei and Yudin, Dmitry and Panov, Aleksandr}, title = {OVSegDT: Segmenting Transformer for Open-Vocabulary Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8120-8129} }
Learning to Reason in 4D: Dynamic Spatial Understanding for Vision Language Models: Shengchao Zhou,

Yuxin Chen,

Yuying Ge,

Wei Huang,

Jiehong Lin,

Ying Shan,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shengchao and Chen, Yuxin and Ge, Yuying and Huang, Wei and Lin, Jiehong and Shan, Ying and Qi, Xiaojuan}, title = {Learning to Reason in 4D: Dynamic Spatial Understanding for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9637-9646} }
MorphAny3D: Unleashing the Power of Structured Latent in 3D Morphing: Xiaokun Sun,

Zeyu Cai,

Hao Tang,

Ying Tai,

Jian Yang,

Zhenyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaokun and Cai, Zeyu and Tang, Hao and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {MorphAny3D: Unleashing the Power of Structured Latent in 3D Morphing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27018-27029} }
THE MORE, THE MERRIER: CONTRASTIVE FUSION FOR HIGHER-ORDER MULTIMODAL ALIGNMENT: Stefanos Koutoupis,

Michaela Areti Zervou,

Konstantinos Kontras,

Maarten De Vos,

Panagiotis Tsakalides,

Grigorios Tsagkatakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koutoupis_2026_CVPR, author = {Koutoupis, Stefanos and Zervou, Michaela Areti and Kontras, Konstantinos and De Vos, Maarten and Tsakalides, Panagiotis and Tsagkatakis, Grigorios}, title = {THE MORE, THE MERRIER: CONTRASTIVE FUSION FOR HIGHER-ORDER MULTIMODAL ALIGNMENT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8825-8835} }
Push-and-Step: From RL-Based Balance Recovery to Physical Simulation of Dense Crowds: Alexis Jensen,

Pei Xu,

Ioannis Karamouzas,

Charles Pontonnier,

Julien Pettré; [pdf] [supp]
[bibtex]
@InProceedings{Jensen_2026_CVPR, author = {Jensen, Alexis and Xu, Pei and Karamouzas, Ioannis and Pontonnier, Charles and Pettr\'e, Julien}, title = {Push-and-Step: From RL-Based Balance Recovery to Physical Simulation of Dense Crowds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16387-16397} }
Beyond Global Similarity: Multi-Conditional Retrieval for Fine-Grained Cross-Modal Understanding: Xuan Lu,

Kangle Li,

Haohang Huang,

Rui Meng,

Wenjun Zeng,

Xiaoyu Shen; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Xuan and Li, Kangle and Huang, Haohang and Meng, Rui and Zeng, Wenjun and Shen, Xiaoyu}, title = {Beyond Global Similarity: Multi-Conditional Retrieval for Fine-Grained Cross-Modal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9699-9709} }
HFR and HDR Video from Multi-Attenuated Spikes Using a Rapidly Rotating SpokeND Filter: Yakun Chang,

Zhaojun Huang,

Siqi Yang,

Yeliduosi Xiaokaiti,

Shikui Wei,

Yao Zhao,

Tiejun Huang,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Yakun and Huang, Zhaojun and Yang, Siqi and Xiaokaiti, Yeliduosi and Wei, Shikui and Zhao, Yao and Huang, Tiejun and Shi, Boxin}, title = {HFR and HDR Video from Multi-Attenuated Spikes Using a Rapidly Rotating SpokeND Filter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19716-19725} }
Experience Transfer for Multimodal LLM Agents in Minecraft Game: Chenghao Li,

Jun Liu,

Songbo Zhang,

Huadong Jian,

Hao Ni,

Lik-Hang Lee,

Sung-Ho Bae,

Guoqing Wang,

Yang Yang,

Chaoning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenghao and Liu, Jun and Zhang, Songbo and Jian, Huadong and Ni, Hao and Lee, Lik-Hang and Bae, Sung-Ho and Wang, Guoqing and Yang, Yang and Zhang, Chaoning}, title = {Experience Transfer for Multimodal LLM Agents in Minecraft Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37143-37153} }
SCE-SLAM: Scale-Consistent Monocular SLAM via Scene Coordinate Embeddings: Yuchen Wu,

Jiahe Li,

Xiaohan Yu,

Lina Yu,

Jin Zheng,

Xiao Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yuchen and Li, Jiahe and Yu, Xiaohan and Yu, Lina and Zheng, Jin and Bai, Xiao}, title = {SCE-SLAM: Scale-Consistent Monocular SLAM via Scene Coordinate Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7480-7490} }
FEAST: Fully Connected Expressive Attention for Spatial Transcriptomics: Taejin Jeong,

Joohyeok Kim,

Jinyeong Kim,

Chanyoung Kim,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Taejin and Kim, Joohyeok and Kim, Jinyeong and Kim, Chanyoung and Hwang, Seong Jae}, title = {FEAST: Fully Connected Expressive Attention for Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26793-26802} }
HeSS: Head Sensitivity Score for Sparsity Redistribution in VGGT: Yongsung Kim,

Wooseok Song,

Jaihyun Lew,

Hun Hwangbo,

Jaehoon Lee,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Yongsung and Song, Wooseok and Lew, Jaihyun and Hwangbo, Hun and Lee, Jaehoon and Yoon, Sungroh}, title = {HeSS: Head Sensitivity Score for Sparsity Redistribution in VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36509-36517} }
InterAgent: Physics-based Multi-agent Command Execution via Diffusion on Interaction Graphs: Bin Li,

Ruichi Zhang,

Han Liang,

Jingyan Zhang,

Juze Zhang,

Xin Chen,

Lan Xu,

Jingyi Yu,

Jingya Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bin and Zhang, Ruichi and Liang, Han and Zhang, Jingyan and Zhang, Juze and Chen, Xin and Xu, Lan and Yu, Jingyi and Wang, Jingya}, title = {InterAgent: Physics-based Multi-agent Command Execution via Diffusion on Interaction Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15253-15265} }
Towards Dynamic Modality Alignment in Multimodal Continual Learning: Jiayao Tan,

Fan Lyu,

Tianle Liu,

Fuyuan Hu,

Wei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Jiayao and Lyu, Fan and Liu, Tianle and Hu, Fuyuan and Feng, Wei}, title = {Towards Dynamic Modality Alignment in Multimodal Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39911-39921} }
Test-Time Perturbation Tuning with Delayed Feedback for Vision-Language-Action Models: Zehua Zang,

Xi Wang,

Fuchun Sun,

Xiao Xu,

Lixiang Liu,

Jiahuan Zhou,

Jiangmeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Zang_2026_CVPR, author = {Zang, Zehua and Wang, Xi and Sun, Fuchun and Xu, Xiao and Liu, Lixiang and Zhou, Jiahuan and Li, Jiangmeng}, title = {Test-Time Perturbation Tuning with Delayed Feedback for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8110-8119} }
SonoWorld: From One Image to a 3D Audio-Visual Scene: Derong Jin,

Xiyi Chen,

Ming C. Lin,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Derong and Chen, Xiyi and Lin, Ming C. and Gao, Ruohan}, title = {SonoWorld: From One Image to a 3D Audio-Visual Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30194-30204} }
AREA3D: Active Reconstruction Agent with Unified Feed-Forward 3D Perception and Vision-Language Guidance: Tianling Xu,

Shengzhe Gan,

Leslie Gu,

Yuelei Li,

Fangneng Zhan,

Hanspeter Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Tianling and Gan, Shengzhe and Gu, Leslie and Li, Yuelei and Zhan, Fangneng and Pfister, Hanspeter}, title = {AREA3D: Active Reconstruction Agent with Unified Feed-Forward 3D Perception and Vision-Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37133-37142} }
Towards High-resolution and Disentangled Reference-based Sketch Colorization: Dingkun Yan,

Xinrui Wang,

Ru Wang,

Zhuoru Li,

Jinze Yu,

Yusuke Iwasawa,

Yutaka Matsuo,

Jiaxian Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Dingkun and Wang, Xinrui and Wang, Ru and Li, Zhuoru and Yu, Jinze and Iwasawa, Yusuke and Matsuo, Yutaka and Guo, Jiaxian}, title = {Towards High-resolution and Disentangled Reference-based Sketch Colorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11472-11481} }
Detect Any AI-Counterfeited Text Image: Chenfan Qu,

Yiwu Zhong,

Xuekang Zhu,

Junchi Li,

Changjiang Jiang,

Jian liu,

Lianwen Jin; [pdf] [supp]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Chenfan and Zhong, Yiwu and Zhu, Xuekang and Li, Junchi and Jiang, Changjiang and liu, Jian and Jin, Lianwen}, title = {Detect Any AI-Counterfeited Text Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35437-35450} }
ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction: Aoyu Liu,

Zhen Liu,

Ziyi Wang,

Dian Chen,

Bing Zeng,

Shuaicheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Aoyu and Liu, Zhen and Wang, Ziyi and Chen, Dian and Zeng, Bing and Liu, Shuaicheng}, title = {ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29909-29918} }
UniDex: A Robot Foundation Suite for Universal Dexterous Hand Control from Egocentric Human Videos: Gu Zhang,

Qicheng Xu,

Haozhe Zhang,

Jianhan Ma,

Long He,

Yiming Bao,

Zeyu Ping,

Zhecheng Yuan,

Chenhao Lu,

Chengbo Yuan,

Tianhai Liang,

Xiaoyu Tian,

Maanping Shao,

Feihong Zhang,

Mingyu Ding,

Yang Gao,

Hao Zhao,

Hang Zhao,

Huazhe Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gu and Xu, Qicheng and Zhang, Haozhe and Ma, Jianhan and He, Long and Bao, Yiming and Ping, Zeyu and Yuan, Zhecheng and Lu, Chenhao and Yuan, Chengbo and Liang, Tianhai and Tian, Xiaoyu and Shao, Maanping and Zhang, Feihong and Ding, Mingyu and Gao, Yang and Zhao, Hao and Zhao, Hang and Xu, Huazhe}, title = {UniDex: A Robot Foundation Suite for Universal Dexterous Hand Control from Egocentric Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1841-1852} }
Open-world Hand-Object Interaction Video Generation Based on Structure and Contact-aware Representation: Haodong Yan,

Hang Yu,

Zhide Zhong,

Weilin Yuan,

Xin Gong,

Zehang Luo,

Chengxi Heyu,

Junfeng Li,

Wenxuan Song,

Shunbo Zhou,

Haoang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haodong and Yu, Hang and Zhong, Zhide and Yuan, Weilin and Gong, Xin and Luo, Zehang and Heyu, Chengxi and Li, Junfeng and Song, Wenxuan and Zhou, Shunbo and Li, Haoang}, title = {Open-world Hand-Object Interaction Video Generation Based on Structure and Contact-aware Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16031-16041} }
LaMoGen: Language to Motion Generation Through LLM-Guided Symbolic Inference: Junkun Jiang,

Ho Yin Au,

Jingyu Xiang,

Jie Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Junkun and Au, Ho Yin and Xiang, Jingyu and Chen, Jie}, title = {LaMoGen: Language to Motion Generation Through LLM-Guided Symbolic Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9364-9373} }
DNF-SR: Dual-Input and Negative-Aware Feature Fine-Tuning for Real-World Image Super-Resolution: Shuhao Han,

Wenjie Liao,

Hayden Vance,

Hang Dong,

Rui Zhang,

Chun-Le Guo,

Chongyi Li; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Shuhao and Liao, Wenjie and Vance, Hayden and Dong, Hang and Zhang, Rui and Guo, Chun-Le and Li, Chongyi}, title = {DNF-SR: Dual-Input and Negative-Aware Feature Fine-Tuning for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38176-38186} }
Unifying Language-Action Understanding and Generation for Autonomous Driving: Xinyang Wang,

Qian Liu,

Wenjie Ding,

Zhao Yang,

Wei Li,

Chang Liu,

Bailin Li,

Kun Zhan,

Xianpeng Lang,

Wei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyang and Liu, Qian and Ding, Wenjie and Yang, Zhao and Li, Wei and Liu, Chang and Li, Bailin and Zhan, Kun and Lang, Xianpeng and Chen, Wei}, title = {Unifying Language-Action Understanding and Generation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25193-25203} }
Tackling Model Bias via Game-theoretic Multi-agent Collaboration Framework for Hateful Meme Classification: Yiwei Wei,

Zhengliang Guo,

Shaozu Yuan,

Chengyin Hu,

Zhiyang Jia,

Jiujiang Guo,

Meng Chen,

Peiying Wang,

Longbiao Wang; [pdf]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yiwei and Guo, Zhengliang and Yuan, Shaozu and Hu, Chengyin and Jia, Zhiyang and Guo, Jiujiang and Chen, Meng and Wang, Peiying and Wang, Longbiao}, title = {Tackling Model Bias via Game-theoretic Multi-agent Collaboration Framework for Hateful Meme Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22143-22152} }
ProFocus: Proactive Perception and Focused Reasoning in Vision-and-Language Navigation: Wei Xue,

Mingcheng Li,

Xuecheng Wu,

Jingqun Tang,

Dingkang Yang,

Lihua Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Wei and Li, Mingcheng and Wu, Xuecheng and Tang, Jingqun and Yang, Dingkang and Zhang, Lihua}, title = {ProFocus: Proactive Perception and Focused Reasoning in Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18129-18139} }
Taming Generative Diffusion Model for Task-Oriented Infrared Imaging: Tengyu Ma,

Zhilong Dai,

Yubo Diao,

Guanming An,

Long Ma,

Jinyuan Liu,

Risheng Liu; [pdf]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tengyu and Dai, Zhilong and Diao, Yubo and An, Guanming and Ma, Long and Liu, Jinyuan and Liu, Risheng}, title = {Taming Generative Diffusion Model for Task-Oriented Infrared Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30843-30853} }
PackUV: Packed Gaussian UV Maps for 4D Volumetric Video: Aashish Rai,

Angela Xing,

Anushka Agarwal,

Xiaoyan Cong,

Zekun Li,

Tao Lu,

Aayush Prakash,

Srinath Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rai_2026_CVPR, author = {Rai, Aashish and Xing, Angela and Agarwal, Anushka and Cong, Xiaoyan and Li, Zekun and Lu, Tao and Prakash, Aayush and Sridhar, Srinath}, title = {PackUV: Packed Gaussian UV Maps for 4D Volumetric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22581-22593} }
PRUE: A Practical Recipe for Field Boundary Segmentation at Scale: Gedeon Muhawenayo,

Caleb Robinson,

Subash Khanal,

Zhanpei Fang,

Isaac Corley,

Alexander Wollam,

Tianyi Gao,

Leonard Strnad,

Ryan Avery,

Lyndon Estes,

Ana Tárano,

Nathan Jacobs,

Hannah Kerner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Muhawenayo_2026_CVPR, author = {Muhawenayo, Gedeon and Robinson, Caleb and Khanal, Subash and Fang, Zhanpei and Corley, Isaac and Wollam, Alexander and Gao, Tianyi and Strnad, Leonard and Avery, Ryan and Estes, Lyndon and T\'arano, Ana and Jacobs, Nathan and Kerner, Hannah}, title = {PRUE: A Practical Recipe for Field Boundary Segmentation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6484-6495} }
Recovering Physically Plausible Human-Object Interactions from Monocular Videos: Dingbang Huang,

Etienne Vouga,

Qixing Huang,

Georgios Pavlakos; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Dingbang and Vouga, Etienne and Huang, Qixing and Pavlakos, Georgios}, title = {Recovering Physically Plausible Human-Object Interactions from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7079-7088} }
Ego2Web: A Web Agent Benchmark Grounded in Egocentric Videos: Shoubin Yu,

Lei Shu,

Antoine Yang,

Yao Fu,

Srinivas Sunkara,

Maria Wang,

Jindong Chen,

Mohit Bansal,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Shoubin and Shu, Lei and Yang, Antoine and Fu, Yao and Sunkara, Srinivas and Wang, Maria and Chen, Jindong and Bansal, Mohit and Gong, Boqing}, title = {Ego2Web: A Web Agent Benchmark Grounded in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25633-25643} }
Beyond Fixed Formulas: Data-Driven Linear Predictor for Efficient Diffusion Models: Zhirong Shen,

Rui Huang,

Jiacheng Liu,

Chang Zou,

Peiliang Cai,

Shikang Zheng,

Zhengyi Shi,

Liang Feng,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Zhirong and Huang, Rui and Liu, Jiacheng and Zou, Chang and Cai, Peiliang and Zheng, Shikang and Shi, Zhengyi and Feng, Liang and Zhang, Linfeng}, title = {Beyond Fixed Formulas: Data-Driven Linear Predictor for Efficient Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30792-30801} }
SPARK: Sim-ready Part-level Articulated Reconstruction with VLM Knowledge: Yumeng He,

Ying Jiang,

Jiayin Lu,

Yin Yang,

Chenfanfu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yumeng and Jiang, Ying and Lu, Jiayin and Yang, Yin and Jiang, Chenfanfu}, title = {SPARK: Sim-ready Part-level Articulated Reconstruction with VLM Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7233-7243} }
Diversity over Uniformity: Rethinking Representation in Generated Image Detection: Qinghui He,

Haifeng Zhang,

Qiao Qin,

Bo Liu,

Xiuli Bi,

Bin Xiao; [pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Qin, Qiao and Liu, Bo and Bi, Xiuli and Xiao, Bin}, title = {Diversity over Uniformity: Rethinking Representation in Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40407-40417} }
LaSM: Layer-wise Scaling Mechanism for Defending Pop-up Attack on GUI Agents: Zihe Yan,

Zhuosheng Zhang,

Jiaping Gui,

Gongshen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Zihe and Zhang, Zhuosheng and Gui, Jiaping and Liu, Gongshen}, title = {LaSM: Layer-wise Scaling Mechanism for Defending Pop-up Attack on GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6528-6537} }
AGiLe: Learning Robust Long-Horizon Manipulation via Affordance-Grounded Bidirectional Latent Planning: Zixuan Chen,

Xiangrong Feng,

Jieqi Shi,

Lin Shao,

Jing Huo,

Yang Gao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zixuan and Feng, Xiangrong and Shi, Jieqi and Shao, Lin and Huo, Jing and Gao, Yang}, title = {AGiLe: Learning Robust Long-Horizon Manipulation via Affordance-Grounded Bidirectional Latent Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6760-6769} }
MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks: Lirong Che,

Shuo Wen,

Shan Huang,

Chuang Wang,

Yuzhe Yang,

Gregory Dudek,

Xueqian Wang,

Jian Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Lirong and Wen, Shuo and Huang, Shan and Wang, Chuang and Yang, Yuzhe and Dudek, Gregory and Wang, Xueqian and Su, Jian}, title = {MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37175-37185} }
Unlocking Strong Supervision: A Data-Centric Study of General-Purpose Audio Pre-Training Methods: Xuanru Zhou,

Yiwen Shao,

Wei-Cheng Tseng,

Dong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuanru and Shao, Yiwen and Tseng, Wei-Cheng and Yu, Dong}, title = {Unlocking Strong Supervision: A Data-Centric Study of General-Purpose Audio Pre-Training Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24780-24791} }
UniDAC: Universal Metric Depth Estimation for Any Camera: Girish Chandar Ganesan,

Yuliang Guo,

Liu Ren,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganesan_2026_CVPR, author = {Ganesan, Girish Chandar and Guo, Yuliang and Ren, Liu and Liu, Xiaoming}, title = {UniDAC: Universal Metric Depth Estimation for Any Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26953-26963} }
MaxMark: High-Capacity Diffusion-Native Watermarking via Robust and Invertible Latent Embedding: Xuanhang Chang,

Zhonghao Yang,

Cheng Zhuo,

Yu Li; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Xuanhang and Yang, Zhonghao and Zhuo, Cheng and Li, Yu}, title = {MaxMark: High-Capacity Diffusion-Native Watermarking via Robust and Invertible Latent Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9394-9403} }
DrivePTS: A Progressive Learning Framework with Textual and Structural Enhancement for Driving Scene Generation: Zhechao Wang,

Yiming Zeng,

Lufan Ma,

Zeqing Fu,

Chen Bai,

Dongshuo Yin,

Ziyao Lin,

Cheng Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhechao and Zeng, Yiming and Ma, Lufan and Fu, Zeqing and Bai, Chen and Yin, Dongshuo and Lin, Ziyao and Lu, Cheng}, title = {DrivePTS: A Progressive Learning Framework with Textual and Structural Enhancement for Driving Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3699-3708} }
Beyond Scanpaths: Graph-Based Gaze Simulation in Dynamic Scenes: Luke Palmer,

Petar Palasek,

Hazem Abdelkawy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Palmer_2026_CVPR, author = {Palmer, Luke and Palasek, Petar and Abdelkawy, Hazem}, title = {Beyond Scanpaths: Graph-Based Gaze Simulation in Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15199-15211} }
See What I Mean: Aligning Vision and Language Representations for Video Fine-grained Object Understanding: Boyuan Sun,

Bo-Wen Yin,

Yuan-Ming Li,

Xihan Wei,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Boyuan and Yin, Bo-Wen and Li, Yuan-Ming and Wei, Xihan and Hou, Qibin}, title = {See What I Mean: Aligning Vision and Language Representations for Video Fine-grained Object Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36814-36827} }
StreamingTOM: Streaming Token Compression for Efficient Video Understanding: Xueyi Chen,

Keda Tao,

Kele Shao,

Huan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xueyi and Tao, Keda and Shao, Kele and Wang, Huan}, title = {StreamingTOM: Streaming Token Compression for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24675-24685} }
Local Motion Matters: A Deconstruct-Recompose Paradigm for Reinforcement Learning Pre-training from Videos: Jinwen Wang,

Youfang Lin,

Xiaobo Hu,

Shuo Wang,

Kai Lv; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jinwen and Lin, Youfang and Hu, Xiaobo and Wang, Shuo and Lv, Kai}, title = {Local Motion Matters: A Deconstruct-Recompose Paradigm for Reinforcement Learning Pre-training from Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9859-9868} }
VirtueBench: Evaluating Trustworthiness under Uncertainty in Long Video Understanding: Xueqing Yu,

Bohan Li,

Yan Li,

Zhenheng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xueqing and Li, Bohan and Li, Yan and Yang, Zhenheng}, title = {VirtueBench: Evaluating Trustworthiness under Uncertainty in Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40581-40590} }
Haptic Neural Fields: Bringing Tactile Interactions to 3D Rendered Scenes: Antonio Luigi Stefani,

Niccolò Bisagno,

Nicola Conci,

Eckehard Steinbach,

Francesco De Natale; [pdf] [supp]
[bibtex]
@InProceedings{Stefani_2026_CVPR, author = {Stefani, Antonio Luigi and Bisagno, Niccol\`o and Conci, Nicola and Steinbach, Eckehard and De Natale, Francesco}, title = {Haptic Neural Fields: Bringing Tactile Interactions to 3D Rendered Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16021-16030} }
HUMORCHAIN: Theory-Guided Multi-Stage Reasoning for Interpretable Multimodal Humor Generation: Jiajun Zhang,

Shijia Luo,

Ruikang Zhang,

Qi Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiajun and Luo, Shijia and Zhang, Ruikang and Su, Qi}, title = {HUMORCHAIN: Theory-Guided Multi-Stage Reasoning for Interpretable Multimodal Humor Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19176-19185} }
Goal Force: Teaching Video Models To Accomplish Physics-Conditioned Goals: Nate Gillman,

Yinghua Zhou,

Zitian Tang,

Evan Luo,

Arjan Chakravarthy,

Daksh Aggarwal,

Michael Freeman,

Chen Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gillman_2026_CVPR, author = {Gillman, Nate and Zhou, Yinghua and Tang, Zitian and Luo, Evan and Chakravarthy, Arjan and Aggarwal, Daksh and Freeman, Michael and Sun, Chen}, title = {Goal Force: Teaching Video Models To Accomplish Physics-Conditioned Goals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20077-20087} }
Selectively Extracting and Injecting Visual Attributes into Text-to-Image Models: Seunghwan Choi,

Jooyeol Yun,

Youngdo Lee,

Jaegul Choo; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Seunghwan and Yun, Jooyeol and Lee, Youngdo and Choo, Jaegul}, title = {Selectively Extracting and Injecting Visual Attributes into Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21976-21985} }
LoFA: Learning to Predict Personalized Prior for Fast Adaptation of Visual Generative Models: Yiming Hao,

Mutian Xu,

Chongjie Ye,

Jie Qin,

Shunlin Lu,

Yipeng Qin,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Yiming and Xu, Mutian and Ye, Chongjie and Qin, Jie and Lu, Shunlin and Qin, Yipeng and Han, Xiaoguang}, title = {LoFA: Learning to Predict Personalized Prior for Fast Adaptation of Visual Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21986-21996} }
GeoTikzBridge: Advancing Multimodal Code Generation for Geometric Perception and Reasoning: Jiayin Sun,

Caixia Sun,

Boyu Yang,

Hailin Li,

Xiao Chen,

Yi Zhang,

Errui Ding,

Liang Li,

Chao Deng,

Junlan Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jiayin and Sun, Caixia and Yang, Boyu and Li, Hailin and Chen, Xiao and Zhang, Yi and Ding, Errui and Li, Liang and Deng, Chao and Feng, Junlan}, title = {GeoTikzBridge: Advancing Multimodal Code Generation for Geometric Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9593-9603} }
Face-Guided Sentiment Boundary Enhancement for Weakly-Supervised Temporal Sentiment Localization: Cailing Han,

Zhangbin Li,

Jinxing Zhou,

Wei Qian,

Jingjing Hu,

Yanghao Zhou,

Zhangling Duan,

Dan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Cailing and Li, Zhangbin and Zhou, Jinxing and Qian, Wei and Hu, Jingjing and Zhou, Yanghao and Duan, Zhangling and Guo, Dan}, title = {Face-Guided Sentiment Boundary Enhancement for Weakly-Supervised Temporal Sentiment Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24759-24769} }
VLA Models Are More Generalizable Than You Think: Revisiting Physical and Spatial Modeling: Weiqi Li,

Quande Zhang,

Ruifeng Zhai,

Liang Lin,

Guangrun Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiqi and Zhang, Quande and Zhai, Ruifeng and Lin, Liang and Wang, Guangrun}, title = {VLA Models Are More Generalizable Than You Think: Revisiting Physical and Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35025-35035} }
SIMSPINE: A Biomechanics-Aware Simulation Framework for 3D Spine Motion Annotation and Benchmarking: Muhammad Saif Ullah Khan,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2026_CVPR, author = {Khan, Muhammad Saif Ullah and Stricker, Didier}, title = {SIMSPINE: A Biomechanics-Aware Simulation Framework for 3D Spine Motion Annotation and Benchmarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21176-21187} }
ResCa: Residual Caching for Diffusion Transformers Acceleration: Haipeng Fang,

Yu Li,

Fan Tang,

Yixing Lu,

Juan Cao,

Sheng Tang; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Haipeng and Li, Yu and Tang, Fan and Lu, Yixing and Cao, Juan and Tang, Sheng}, title = {ResCa: Residual Caching for Diffusion Transformers Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32957-32966} }
Omni2Sound: Towards Unified Video-Text-to-Audio Generation: Yusheng Dai,

Zehua Chen,

Yuxuan Jiang,

Qiuhong Ke,

Jianfei Cai,

Jun Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Yusheng and Chen, Zehua and Jiang, Yuxuan and Ke, Qiuhong and Cai, Jianfei and Zhu, Jun}, title = {Omni2Sound: Towards Unified Video-Text-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1661-1671} }
Composing Concepts from Images and Videos via Concept-prompt Binding: Xianghao Kong,

Zeyu Zhang,

Yuwei Guo,

Zhuoran Zhao,

Songchun Zhang,

Anyi Rao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Xianghao and Zhang, Zeyu and Guo, Yuwei and Zhao, Zhuoran and Zhang, Songchun and Rao, Anyi}, title = {Composing Concepts from Images and Videos via Concept-prompt Binding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14800-14810} }
RECS4R: Bridging Semantics and Geometry for Referring Remote Sensing Interpretation: Jinming Chai,

Lingling Li,

Licheng Jiao,

Xiaoqiang Lu,

Long Sun,

Xu Liu,

Wenping Ma,

Weibin Li; [pdf] [supp]
[bibtex]
@InProceedings{Chai_2026_CVPR, author = {Chai, Jinming and Li, Lingling and Jiao, Licheng and Lu, Xiaoqiang and Sun, Long and Liu, Xu and Ma, Wenping and Li, Weibin}, title = {RECS4R: Bridging Semantics and Geometry for Referring Remote Sensing Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42213-42224} }
LogCD: Local-to-global Consistency Distillation for Few-step Image Generation: Qingsong Xie,

Zhenyi Liao,

Chen Chen,

Zhijie Deng,

Haonan Lu; [pdf]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Qingsong and Liao, Zhenyi and Chen, Chen and Deng, Zhijie and Lu, Haonan}, title = {LogCD: Local-to-global Consistency Distillation for Few-step Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8954-8964} }
Universal Guideline-Driven Image Clustering via a Hybrid LLM Agent: Wenliang Zhong,

Rob Barton,

Lucas Goncalves,

Kushal Kumar,

Feng Jiang,

Hehuan Ma,

Yuzhi Guo,

Vidit Bansal,

Karim Bouyarmane,

Junzhou Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Wenliang and Barton, Rob and Goncalves, Lucas and Kumar, Kushal and Jiang, Feng and Ma, Hehuan and Guo, Yuzhi and Bansal, Vidit and Bouyarmane, Karim and Huang, Junzhou}, title = {Universal Guideline-Driven Image Clustering via a Hybrid LLM Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33749-33760} }
EW-DETR: Evolving World Object Detection via Incremental Low-Rank DEtection TRansformer: Munish Monga,

Vishal Chudasama,

Pankaj Wasnik,

C.V. Jawahar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Monga_2026_CVPR, author = {Monga, Munish and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {EW-DETR: Evolving World Object Detection via Incremental Low-Rank DEtection TRansformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11324-11333} }
GOR-IS: 3D Gaussian Object Removal In the Intrinsic Space: Yonghao Zhao,

Yupeng Gao,

Jian Yang,

Jin Xie,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yonghao and Gao, Yupeng and Yang, Jian and Xie, Jin and Wang, Beibei}, title = {GOR-IS: 3D Gaussian Object Removal In the Intrinsic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40896-40906} }
AlcheMinT: Fine-grained Temporal Control for Multi-Reference Consistent Video Generation: Sharath Girish,

Viacheslav Ivanov,

Tsai-Shien Chen,

Hao Chen,

Aliaksandr Siarohin,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Girish_2026_CVPR, author = {Girish, Sharath and Ivanov, Viacheslav and Chen, Tsai-Shien and Chen, Hao and Siarohin, Aliaksandr and Tulyakov, Sergey}, title = {AlcheMinT: Fine-grained Temporal Control for Multi-Reference Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23227-23237} }
Understanding the Role of Hallucination in Reinforcement Post-Training of Multimodal Reasoning Models: Gengwei Zhang,

Jie Peng,

Zhen Tan,

Mufan Qiu,

Hossein Nourkhiz Mahjoub,

Vaishnav Tadiparthi,

Kwonjoon Lee,

Yanyong Zhang,

Tianlong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gengwei and Peng, Jie and Tan, Zhen and Qiu, Mufan and Mahjoub, Hossein Nourkhiz and Tadiparthi, Vaishnav and Lee, Kwonjoon and Zhang, Yanyong and Chen, Tianlong}, title = {Understanding the Role of Hallucination in Reinforcement Post-Training of Multimodal Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25326-25335} }
QuietPrune: Query-Guided Early Token Pruning for Vision-Language Models: Tianxiao Gao,

Shanwei Zhao,

Shuo Fang,

Shiai Zhu,

Chenguang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Tianxiao and Zhao, Shanwei and Fang, Shuo and Zhu, Shiai and Ma, Chenguang}, title = {QuietPrune: Query-Guided Early Token Pruning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3553-3562} }
b-CLIP: Text-Conditioned Contrastive Learning for Multi-Granular Vision-Language Alignment: Fatimah Zohra,

Chen Zhao,

Hani Itani,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Zohra_2026_CVPR, author = {Zohra, Fatimah and Zhao, Chen and Itani, Hani and Ghanem, Bernard}, title = {b-CLIP: Text-Conditioned Contrastive Learning for Multi-Granular Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {680-689} }
IMU-HOI: A Symbiotic Framework for Coherent Human-Object Interaction and Motion Capture via Contact-Conscious Inertial Fusion: Lizhou Lin,

Songpengcheng Xia,

Zengyuan Lai,

Lan Sun,

Jiarui Yang,

Ling Pei; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Lizhou and Xia, Songpengcheng and Lai, Zengyuan and Sun, Lan and Yang, Jiarui and Pei, Ling}, title = {IMU-HOI: A Symbiotic Framework for Coherent Human-Object Interaction and Motion Capture via Contact-Conscious Inertial Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42901-42910} }
From Sketch to Fresco: Efficient Diffusion Transformer with Progressive Resolution: Shikang Zheng,

Guantao Chen,

Landis He,

Jiacheng Liu,

Yuqi Lin,

Chang Zou,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Shikang and Chen, Guantao and He, Landis and Liu, Jiacheng and Lin, Yuqi and Zou, Chang and Zhang, Linfeng}, title = {From Sketch to Fresco: Efficient Diffusion Transformer with Progressive Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18714-18723} }
DSO: Direct Steering Optimization for Bias Mitigation: Lucas Monteiro Paes,

Nivedha Sivakumar,

Yinong Oliver Wang,

Masha Fedzechkina,

Barry-John Theobald,

Luca Zappella,

Nicholas Apostoloff; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Paes_2026_CVPR, author = {Paes, Lucas Monteiro and Sivakumar, Nivedha and Wang, Yinong Oliver and Fedzechkina, Masha and Theobald, Barry-John and Zappella, Luca and Apostoloff, Nicholas}, title = {DSO: Direct Steering Optimization for Bias Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31714-31724} }
From Pixel to Precision: Enhancing Handwritten Mathematical Expression Recognition with Image-Level Reward: Ze Liu,

Kai Zhang,

Xianquan Wang,

Shuochen Liu,

Jiaxian Yan,

Yupeng Han,

Qi Liu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ze and Zhang, Kai and Wang, Xianquan and Liu, Shuochen and Yan, Jiaxian and Han, Yupeng and Liu, Qi}, title = {From Pixel to Precision: Enhancing Handwritten Mathematical Expression Recognition with Image-Level Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25948-25957} }
Rethinking Model Selection in VLM Through the Lens of Gromov-Wasserstein Distance: Muyang Li,

Yucheng Liu,

Jianbo Ma,

Elliot Osborne,

Bo Han,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Muyang and Liu, Yucheng and Ma, Jianbo and Osborne, Elliot and Han, Bo and Liu, Tongliang}, title = {Rethinking Model Selection in VLM Through the Lens of Gromov-Wasserstein Distance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17237-17247} }
TSTM: Temporal Segmentation for Task-relevant Mask in Visual Reinforcement Learning Generalization: Weicheng Du,

Wenjia Meng,

Zhengzhe Zhang,

Yilong Yin,

Xiankai Lu; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Weicheng and Meng, Wenjia and Zhang, Zhengzhe and Yin, Yilong and Lu, Xiankai}, title = {TSTM: Temporal Segmentation for Task-relevant Mask in Visual Reinforcement Learning Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27999-28009} }
Cross-modal Identity Mapping: Minimizing Information Loss in Modality Conversion via Reinforcement Learning: Haonan Jia,

Shichao Dong,

Xin Dong,

Zenghui Sun,

Jin Wang,

Jinsong Lan,

Xiaoyong Zhu,

Bo Zheng,

Kaifu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Haonan and Dong, Shichao and Dong, Xin and Sun, Zenghui and Wang, Jin and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo and Zhang, Kaifu}, title = {Cross-modal Identity Mapping: Minimizing Information Loss in Modality Conversion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {766-777} }
Graph Attention Prototypical Network for Robust Few-Shot Classification: Tingyun Liu,

Licheng Liu,

Qibin Zhang,

Qiying Feng,

C. L. Philip Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tingyun and Liu, Licheng and Zhang, Qibin and Feng, Qiying and Chen, C. L. Philip}, title = {Graph Attention Prototypical Network for Robust Few-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33933-33942} }
Towards Highly Transferable Vision-Language Attack via Semantic-Augmented Dynamic Contrastive Interaction: Yuanbo Li,

Tianyang Xu,

Cong Hu,

Tao Zhou,

Xiaojun Wu,

Josef Kittler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuanbo and Xu, Tianyang and Hu, Cong and Zhou, Tao and Wu, Xiaojun and Kittler, Josef}, title = {Towards Highly Transferable Vision-Language Attack via Semantic-Augmented Dynamic Contrastive Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1493-1502} }
CC-VQA: Conflict- and Correlation-Aware Method for Mitigating Knowledge Conflict in Knowledge-Based Visual Question Answering: Yuyang Hong,

Jiaqi Gu,

Yujing Lou,

Lubin Fan,

Qi Yang,

Ying Wang,

Kun Ding,

Yue Wu,

Shiming Xiang,

Jieping Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Yuyang and Gu, Jiaqi and Lou, Yujing and Fan, Lubin and Yang, Qi and Wang, Ying and Ding, Kun and Wu, Yue and Xiang, Shiming and Ye, Jieping}, title = {CC-VQA: Conflict- and Correlation-Aware Method for Mitigating Knowledge Conflict in Knowledge-Based Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5232-5241} }
Ghosts in the Point Clouds: De-glaring LiDAR in the Transient Domain: Avery Gump,

Connor Henley,

Sungjin Cheong,

Akarsh Prabhakara,

Mohit Gupta; [pdf] [supp]
[bibtex]
@InProceedings{Gump_2026_CVPR, author = {Gump, Avery and Henley, Connor and Cheong, Sungjin and Prabhakara, Akarsh and Gupta, Mohit}, title = {Ghosts in the Point Clouds: De-glaring LiDAR in the Transient Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17174-17183} }
OneOcc: Semantic Occupancy Prediction for Legged Robots with a Single Panoramic Camera: Hao Shi,

Ze Wang,

Shangwei Guo,

Mengfei Duan,

Song Wang,

Teng Chen,

Kailun Yang,

Lin Wang,

Kaiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Hao and Wang, Ze and Guo, Shangwei and Duan, Mengfei and Wang, Song and Chen, Teng and Yang, Kailun and Wang, Lin and Wang, Kaiwei}, title = {OneOcc: Semantic Occupancy Prediction for Legged Robots with a Single Panoramic Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14229-14240} }
ModularAgent: A Task-Aware Modular Framework for Joint Optimization of Multimodal Large Language Models and World Models: Yu-Wei Zhan,

Xin Wang,

Pengzhe Mao,

Tongtong Feng,

Ren Wang,

Wenwu Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yu-Wei and Wang, Xin and Mao, Pengzhe and Feng, Tongtong and Wang, Ren and Zhu, Wenwu}, title = {ModularAgent: A Task-Aware Modular Framework for Joint Optimization of Multimodal Large Language Models and World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8087-8096} }
Bridging Domains through Subspace-Aware Model Merging: Levy Chaves,

Chao Zhou,

Rebekka Burkholz,

Eduardo Valle,

Sandra Avila; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaves_2026_CVPR, author = {Chaves, Levy and Zhou, Chao and Burkholz, Rebekka and Valle, Eduardo and Avila, Sandra}, title = {Bridging Domains through Subspace-Aware Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7990-7999} }
Dynamic Magic: Unleashing Restricted Knowledge for Lifelong Person Re-Identification: Jinjia Peng,

Jican Tan,

Jiazuo Yu,

Zeze Tao,

Huibing Wang; [pdf]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Jinjia and Tan, Jican and Yu, Jiazuo and Tao, Zeze and Wang, Huibing}, title = {Dynamic Magic: Unleashing Restricted Knowledge for Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32278-32287} }
Dynamic Black-hole Emission Tomography with Physics-informed Neural Fields: Berthy T. Feng,

Andrew A. Chael,

David Bromley,

Aviad Levis,

William T. Freeman,

Katherine L. Bouman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Berthy T. and Chael, Andrew A. and Bromley, David and Levis, Aviad and Freeman, William T. and Bouman, Katherine L.}, title = {Dynamic Black-hole Emission Tomography with Physics-informed Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12511-12521} }
Spatial-Aware VLA Pretraining through Visual-Physical Alignment from Human Videos: Yicheng Feng,

Wanpeng Zhang,

Ye Wang,

Hao Luo,

Haoqi Yuan,

Sipeng Zheng,

Zongqing Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yicheng and Zhang, Wanpeng and Wang, Ye and Luo, Hao and Yuan, Haoqi and Zheng, Sipeng and Lu, Zongqing}, title = {Spatial-Aware VLA Pretraining through Visual-Physical Alignment from Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {712-723} }
PnP-CM: Consistency Models as Plug-and-Play Priors for Inverse Problems: Merve Gulle,

Junno Yun,

Yasar Utku Alcalar,

Mehmet Akcakaya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gulle_2026_CVPR, author = {Gulle, Merve and Yun, Junno and Alcalar, Yasar Utku and Akcakaya, Mehmet}, title = {PnP-CM: Consistency Models as Plug-and-Play Priors for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38464-38474} }
Remote Sensing Image Super-Resolution for Imbalanced Textures: A Texture-Aware Diffusion Framework: Enzhuo Zhang,

Sijie Zhao,

Dilxat Muhtar,

Zhenshi Li,

Xueliang Zhang,

Pengfeng Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Enzhuo and Zhao, Sijie and Muhtar, Dilxat and Li, Zhenshi and Zhang, Xueliang and Xiao, Pengfeng}, title = {Remote Sensing Image Super-Resolution for Imbalanced Textures: A Texture-Aware Diffusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38238-38247} }
Hist2Style: Histogram-Guided Stylization with Bilateral Grids: Dekel Galor,

Adam Pikielny,

Zhoutong Zhang,

Ke Wang,

Laura Waller,

Jiawen Chen,

Ilya Chugunov; [pdf] [supp]
[bibtex]
@InProceedings{Galor_2026_CVPR, author = {Galor, Dekel and Pikielny, Adam and Zhang, Zhoutong and Wang, Ke and Waller, Laura and Chen, Jiawen and Chugunov, Ilya}, title = {Hist2Style: Histogram-Guided Stylization with Bilateral Grids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29717-29726} }
Back to the Feature: Explaining Video Classifiers with Video Counterfactual Explanations: Chao Wang,

Chengan Che,

Xinyue Chen,

Sophia Tsoka,

Luis C. Garcia-Peraza-Herrera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chao and Che, Chengan and Chen, Xinyue and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {Back to the Feature: Explaining Video Classifiers with Video Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9921-9931} }
Planning in 8 Tokens: A Compact Discrete Tokenizer for Latent World Model: Dongwon Kim,

Gawon Seo,

Jinsung Lee,

Minsu Cho,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Dongwon and Seo, Gawon and Lee, Jinsung and Cho, Minsu and Kwak, Suha}, title = {Planning in 8 Tokens: A Compact Discrete Tokenizer for Latent World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8183-8193} }
Mamba Learns in Context: Structure-Aware Domain Generalization for Multi-Task Point Cloud Understanding: Jincen Jiang,

Qianyu Zhou,

Yuhang Li,

Kui Su,

Meili Wang,

Jian Chang,

Jian Jun Zhang,

Xuequan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jincen and Zhou, Qianyu and Li, Yuhang and Su, Kui and Wang, Meili and Chang, Jian and Zhang, Jian Jun and Lu, Xuequan}, title = {Mamba Learns in Context: Structure-Aware Domain Generalization for Multi-Task Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39098-39110} }
ALLNet: Multi-task Dense Prediction for Degraded Images: Weiran Wang,

Jialing Wu,

Yaqi Chang,

Gang He,

Li Xu,

Chang Wu,

Yunsong Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Weiran and Wu, Jialing and Chang, Yaqi and He, Gang and Xu, Li and Wu, Chang and Li, Yunsong}, title = {ALLNet: Multi-task Dense Prediction for Degraded Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20421-20432} }
GMT: Effective Global Framework for Multi-Camera Multi-Target Tracking: Yihao Zhen,

Mingyue Xu,

Qiang Wang,

Baojie Fan,

Jiahua Dong,

Tinghui Zhao,

Huijie Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhen_2026_CVPR, author = {Zhen, Yihao and Xu, Mingyue and Wang, Qiang and Fan, Baojie and Dong, Jiahua and Zhao, Tinghui and Fan, Huijie}, title = {GMT: Effective Global Framework for Multi-Camera Multi-Target Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28201-28210} }
Vision-Speech Models: Teaching Speech Models to Converse about Images: Amélie Royer,

Moritz Böhle,

Laurent Mazaré,

Neil Zeghidour,

Alexandre Défossez,

Patrick Pérez; [pdf] [supp]
[bibtex]
@InProceedings{Royer_2026_CVPR, author = {Royer, Am\'elie and B\"ohle, Moritz and Mazar\'e, Laurent and Zeghidour, Neil and D\'efossez, Alexandre and P\'erez, Patrick}, title = {Vision-Speech Models: Teaching Speech Models to Converse about Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1706-1715} }
Geometric-Photometric Event-based 3D Gaussian Ray Tracing: Kai Kohyama,

Yoshimitsu Aoki,

Guillermo Gallego,

Shintaro Shiba; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kohyama_2026_CVPR, author = {Kohyama, Kai and Aoki, Yoshimitsu and Gallego, Guillermo and Shiba, Shintaro}, title = {Geometric-Photometric Event-based 3D Gaussian Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22301-22311} }
Towards Universal Computational Aberration Correction in Photographic Cameras: A Comprehensive Benchmark Analysis: Xiaolong Qian,

Qi Jiang,

Yao Gao,

Lei Sun,

Zhonghua Yi,

Kailun Yang,

Luc Van Gool,

Kaiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Xiaolong and Jiang, Qi and Gao, Yao and Sun, Lei and Yi, Zhonghua and Yang, Kailun and Van Gool, Luc and Wang, Kaiwei}, title = {Towards Universal Computational Aberration Correction in Photographic Cameras: A Comprehensive Benchmark Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26772-26782} }
MatchED: Crisp Edge Detection Using End-to-End, Matching-based Supervision: Bedrettin Cetinkaya,

Sinan Kalkan,

Emre Akbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cetinkaya_2026_CVPR, author = {Cetinkaya, Bedrettin and Kalkan, Sinan and Akbas, Emre}, title = {MatchED: Crisp Edge Detection Using End-to-End, Matching-based Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42093-42103} }
Robust Promptable Video Object Segmentation: Sohyun Lee,

Yeho Gwon,

Lukas Hoyer,

Konrad Schindler,

Christos Sakaridis,

Suha Kwak; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sohyun and Gwon, Yeho and Hoyer, Lukas and Schindler, Konrad and Sakaridis, Christos and Kwak, Suha}, title = {Robust Promptable Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39838-39847} }
Color-Encoded Illumination for High-Speed Volumetric Scene Reconstruction: David Novikov,

Eilon Vaknin,

Narek Tumanyan,

Mark Sheinin; [pdf] [arXiv]
[bibtex]
@InProceedings{Novikov_2026_CVPR, author = {Novikov, David and Vaknin, Eilon and Tumanyan, Narek and Sheinin, Mark}, title = {Color-Encoded Illumination for High-Speed Volumetric Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41784-41793} }
SAG-GNN: Semantic-Aware Guided GNN for Descriptor-Free 2D-3D Matching: Shihua Zhang,

Tianhao Xu,

Zizhuo Li,

Qing Ma,

Jiayi Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shihua and Xu, Tianhao and Li, Zizhuo and Ma, Qing and Ma, Jiayi}, title = {SAG-GNN: Semantic-Aware Guided GNN for Descriptor-Free 2D-3D Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31410-31420} }
Towards Storytelling Animations: Joint Synthesis of Human and Camera Motions: Boyuan Cheng,

Yingjie Xi,

Rui He,

Jinhe Na,

Ying Cao,

Pengjie Wang,

Jian J. Zhang,

Xiaosong Yang; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Boyuan and Xi, Yingjie and He, Rui and Na, Jinhe and Cao, Ying and Wang, Pengjie and Zhang, Jian J. and Yang, Xiaosong}, title = {Towards Storytelling Animations: Joint Synthesis of Human and Camera Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38376-38386} }
Evidential Transformation Network: Turning Pretrained Models into Evidential Models for Post-hoc Uncertainty Estimation: Yongchan Chun,

Chanhee Park,

Jeongho Yoon,

Jaehyung Seo,

Heuiseok Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chun_2026_CVPR, author = {Chun, Yongchan and Park, Chanhee and Yoon, Jeongho and Seo, Jaehyung and Lim, Heuiseok}, title = {Evidential Transformation Network: Turning Pretrained Models into Evidential Models for Post-hoc Uncertainty Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6157-6166} }
Reasoning-Driven Anomaly Detection and Localization with Image-Level Supervision: Yizhou Jin,

Yuezhu Feng,

Jinjin Zhang,

Peng Wang,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Yizhou and Feng, Yuezhu and Zhang, Jinjin and Wang, Peng and Liu, Qingjie and Wang, Yunhong}, title = {Reasoning-Driven Anomaly Detection and Localization with Image-Level Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43061-43071} }
DEVA: Fine-tuning Multimodal Large Language Models for Visual Perception Tasks: Debasmit Das,

Munawar Hayat,

Fatih Porikli; [pdf] [supp]
[bibtex]
@InProceedings{Das_2026_CVPR, author = {Das, Debasmit and Hayat, Munawar and Porikli, Fatih}, title = {DEVA: Fine-tuning Multimodal Large Language Models for Visual Perception Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39141-39151} }
SplitFlux: Learning to Decouple Content and Style from a Single Image: Yitong Yang,

Yinglin Wang,

Changshuo Wang,

Yongjun Zhang,

Ziyang Chen,

Shuting He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yitong and Wang, Yinglin and Wang, Changshuo and Zhang, Yongjun and Chen, Ziyang and He, Shuting}, title = {SplitFlux: Learning to Decouple Content and Style from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {572-582} }
TAlignDiff: Automatic Tooth Alignment assisted by Diffusion-based Transformation Learning: Yunbi Liu,

Enqi Tang,

Shiyu Li,

Hui Shuai,

Lei Ma,

Juncheng Li,

Kuai Yu,

Shu Lou,

Yongchu Pan,

Qingshan Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yunbi and Tang, Enqi and Li, Shiyu and Shuai, Hui and Ma, Lei and Li, Juncheng and Yu, Kuai and Lou, Shu and Pan, Yongchu and Liu, Qingshan}, title = {TAlignDiff: Automatic Tooth Alignment assisted by Diffusion-based Transformation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22784-22793} }
BA-GS: Bayesian Adaptive Gaussian Splatting for SFM-Free 3D Reconstruction: Zhongjie Ma,

Di Lin,

Xin Wang,

Haotian Dong,

Chong Wang,

Dongdong Wu,

Changqing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Zhongjie and Lin, Di and Wang, Xin and Dong, Haotian and Wang, Chong and Wu, Dongdong and Zhang, Changqing}, title = {BA-GS: Bayesian Adaptive Gaussian Splatting for SFM-Free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26124-26133} }
MV-Fashion: Towards Enabling Virtual Try-On and Size Estimation with Multi-View Paired Data: Hunor Laczkó,

Libang Jia,

Loc-Phat Truong,

Diego Hernández,

Sergio Escalera,

Jordi Gonzalez,

Meysam Madadi; [pdf] [supp]
[bibtex]
@InProceedings{Laczko_2026_CVPR, author = {Laczk\'o, Hunor and Jia, Libang and Truong, Loc-Phat and Hern\'andez, Diego and Escalera, Sergio and Gonzalez, Jordi and Madadi, Meysam}, title = {MV-Fashion: Towards Enabling Virtual Try-On and Size Estimation with Multi-View Paired Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42810-42823} }
CFG-Ctrl: Control-Based Classifier-Free Diffusion Guidance: Hanyang Wang,

Yiyang Liu,

Jiawei Chi,

Fangfu Liu,

Ran Xue,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hanyang and Liu, Yiyang and Chi, Jiawei and Liu, Fangfu and Xue, Ran and Duan, Yueqi}, title = {CFG-Ctrl: Control-Based Classifier-Free Diffusion Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11437-11447} }
VGGT-Segmentor: Geometry-Enhanced Cross-View Segmentation: Yulu Gao,

Bohao Zhang,

Zongheng Tang,

Jitong Liao,

Wenjun Wu,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yulu and Zhang, Bohao and Tang, Zongheng and Liao, Jitong and Wu, Wenjun and Liu, Si}, title = {VGGT-Segmentor: Geometry-Enhanced Cross-View Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21690-21700} }
FlowComposer: Composable Flows for Compositional Zero-Shot Learning: Zhenqi He,

Lin Li,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zhenqi and Li, Lin and Chen, Long}, title = {FlowComposer: Composable Flows for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12396-12405} }
UAV-CB: A Complex-Background RGB-T Dataset and Local Frequency Bridge Network for UAV Detection: Shenghui Huang,

Menghao Hu,

Longkun Zou,

Hongyu Chi,

Zekai Li,

Feng Gao,

Fan Yang,

Qingyao Wu,

Ke Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Shenghui and Hu, Menghao and Zou, Longkun and Chi, Hongyu and Li, Zekai and Gao, Feng and Yang, Fan and Wu, Qingyao and Chen, Ke}, title = {UAV-CB: A Complex-Background RGB-T Dataset and Local Frequency Bridge Network for UAV Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40468-40478} }
Generalizable Sparse-View 3D Reconstruction from Unconstrained Images: Vinayak Gupta,

Chih-Hao Lin,

Shenlong Wang,

Anand Bhattad,

Jia-Bin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gupta_2026_CVPR, author = {Gupta, Vinayak and Lin, Chih-Hao and Wang, Shenlong and Bhattad, Anand and Huang, Jia-Bin}, title = {Generalizable Sparse-View 3D Reconstruction from Unconstrained Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33217-33226} }
BuildAnyPoint: 3D Building Structured Abstraction from Diverse Point Clouds: Tongyan Hua,

Haoran Gong,

Yuan Liu,

Di Wang,

Ying-Cong Chen,

Wufan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2026_CVPR, author = {Hua, Tongyan and Gong, Haoran and Liu, Yuan and Wang, Di and Chen, Ying-Cong and Zhao, Wufan}, title = {BuildAnyPoint: 3D Building Structured Abstraction from Diverse Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17099-17109} }
XPaintNet: An eXtreme Lightweight Framework for Stereoscopic Conversion without Inpainting Network: Kihwan Yoon,

Juyeon Shin,

Jungheum Kang,

Sijung Kim,

Minyong Jeon; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Kihwan and Shin, Juyeon and Kang, Jungheum and Kim, Sijung and Jeon, Minyong}, title = {XPaintNet: An eXtreme Lightweight Framework for Stereoscopic Conversion without Inpainting Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5762-5771} }
MoDES: Accelerating Mixture-of-Experts Multimodal Large Language Models via Dynamic Expert Skipping: Yushi Huang,

Zining Wang,

Zhihang Yuan,

Yifu Ding,

Ruihao Gong,

Jinyang Guo,

Xianglong Liu,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yushi and Wang, Zining and Yuan, Zhihang and Ding, Yifu and Gong, Ruihao and Guo, Jinyang and Liu, Xianglong and Zhang, Jun}, title = {MoDES: Accelerating Mixture-of-Experts Multimodal Large Language Models via Dynamic Expert Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30205-30215} }
BarbieGait: An Identity-Consistent Synthetic Human Dataset with Versatile Cloth-Changing for Gait Recognition: Qingyuan Cai,

Saihui Hou,

Xuecai Hu,

Yongzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Qingyuan and Hou, Saihui and Hu, Xuecai and Huang, Yongzhen}, title = {BarbieGait: An Identity-Consistent Synthetic Human Dataset with Versatile Cloth-Changing for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28402-28412} }
Why Does RL Generalize Better Than SFT? A Data-Centric Perspective on VLM Post-Training: Aojun Lu,

Tao Feng,

Hangjie Yuan,

Wei Li,

Yanan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Aojun and Feng, Tao and Yuan, Hangjie and Li, Wei and Sun, Yanan}, title = {Why Does RL Generalize Better Than SFT? A Data-Centric Perspective on VLM Post-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4761-4771} }
Cross-Hand Latent Representation for Vision-Language-Action Models: Guangqi Jiang,

Yutong Liang,

Jianglong Ye,

Jia-Yang Huang,

Changwei Jing,

Rocky Duan,

Pieter Abbeel,

Xiaolong Wang,

Xueyan Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Guangqi and Liang, Yutong and Ye, Jianglong and Huang, Jia-Yang and Jing, Changwei and Duan, Rocky and Abbeel, Pieter and Wang, Xiaolong and Zou, Xueyan}, title = {Cross-Hand Latent Representation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13496-13507} }
NoOVD: Novel Category Discovery and Embedding for Open-Vocabulary Object Detection: Yupeng Zhang,

Ruize Han,

Zhiwei Chen,

Wei Feng,

Liang Wan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Chen, Zhiwei and Feng, Wei and Wan, Liang}, title = {NoOVD: Novel Category Discovery and Embedding for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6304-6313} }
VINS-120K: Ultra High-Resolution Image Editing with A Large-Scale Dataset: Zhizhou Chen,

Shanyan Guan,

Zhanxin Gao,

En Ci,

Yanhao Ge,

Wei Li,

Zhenyu Zhang,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhizhou and Guan, Shanyan and Gao, Zhanxin and Ci, En and Ge, Yanhao and Li, Wei and Zhang, Zhenyu and Yang, Jian and Tai, Ying}, title = {VINS-120K: Ultra High-Resolution Image Editing with A Large-Scale Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15302-15312} }
SearchAD: Large-Scale Rare Image Retrieval Dataset for Autonomous Driving: Felix Embacher,

Jonas Uhrig,

Marius Cordts,

Markus Enzweiler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Embacher_2026_CVPR, author = {Embacher, Felix and Uhrig, Jonas and Cordts, Marius and Enzweiler, Markus}, title = {SearchAD: Large-Scale Rare Image Retrieval Dataset for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33088-33098} }
Space-Time Forecasting of Dynamic Scenes with Motion-aware Gaussian Grouping: Junmyeong Lee,

Hoseung Choi,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junmyeong and Choi, Hoseung and Cho, Minsu}, title = {Space-Time Forecasting of Dynamic Scenes with Motion-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41033-41043} }
POGA: Paraphrased and Oppositional Graph Alignment for Fine-Grained Cross-Modal Retrieval: Junfeng Zhang,

Zhe Xue,

Yuankai Qi,

Junping Du,

Xiangyang Kong,

Yishuo Yan,

Amin Beheshti,

Jian Yang,

Anton van den Hengel,

Ming-Hsuan Yang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junfeng and Xue, Zhe and Qi, Yuankai and Du, Junping and Kong, Xiangyang and Yan, Yishuo and Beheshti, Amin and Yang, Jian and van den Hengel, Anton and Yang, Ming-Hsuan}, title = {POGA: Paraphrased and Oppositional Graph Alignment for Fine-Grained Cross-Modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2735-2745} }
PatchAlign3D: Local Feature Alignment for Dense 3D Shape Understanding: Souhail Hadgi,

Bingchen Gong,

Ramana Sundararaman,

Emery Pierson,

Lei Li,

Peter Wonka,

Maks Ovsjanikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hadgi_2026_CVPR, author = {Hadgi, Souhail and Gong, Bingchen and Sundararaman, Ramana and Pierson, Emery and Li, Lei and Wonka, Peter and Ovsjanikov, Maks}, title = {PatchAlign3D: Local Feature Alignment for Dense 3D Shape Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3014-3023} }
Nonlinear Color Transfer via Learnable Bezier Flows: Junhyoung Lee,

Seongwoon Jo,

JeongHun Park,

Yeonji Ryou,

Jeongha Yang,

Jangho Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junhyoung and Jo, Seongwoon and Park, JeongHun and Ryou, Yeonji and Yang, Jeongha and Kim, Jangho}, title = {Nonlinear Color Transfer via Learnable Bezier Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41741-41751} }
DreamOmni2: Multimodal Instruction-based Generation and Editing: Bin Xia,

bohao peng,

Yuechen Zhang,

Junjia Huang,

Jiyang Liu,

Jingyao Li,

Haoru Tan,

Sitong Wu,

Chengyao Wang,

Yitong Wang,

Bei Yu,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Bin and peng, bohao and Zhang, Yuechen and Huang, Junjia and Liu, Jiyang and Li, Jingyao and Tan, Haoru and Wu, Sitong and Wang, Chengyao and Wang, Yitong and Yu, Bei and Jia, Jiaya}, title = {DreamOmni2: Multimodal Instruction-based Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29275-29284} }
TACO: Task-Aware Contrastive Learning for Joint LiDAR Localization and 3D Object Detection: Leyuan Xing,

Huanjia Zhang,

Dongyu Pan,

Hai Wu,

Qiming Xia,

Kezheng Xiong,

Wen Li,

Chenglu Wen,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Leyuan and Zhang, Huanjia and Pan, Dongyu and Wu, Hai and Xia, Qiming and Xiong, Kezheng and Li, Wen and Wen, Chenglu and Wang, Cheng}, title = {TACO: Task-Aware Contrastive Learning for Joint LiDAR Localization and 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9965-9975} }
SAM2Text: Towards Prompt-Free and Multi-Resolution Video Scene Text Segmentation: Jing-Yao Zhang,

Heng Zhang,

Mingsen Zhang,

Binbin Yang,

Fei Yin; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jing-Yao and Zhang, Heng and Zhang, Mingsen and Yang, Binbin and Yin, Fei}, title = {SAM2Text: Towards Prompt-Free and Multi-Resolution Video Scene Text Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3825-3834} }
JANUS: A Lightweight Framework for Jailbreaking Text-to-Image Models via Distribution Optimization: Haolun Zheng,

Yu He,

Tailun Chen,

Shuo Shao,

Zhixuan Chu,

Hongbin Zhou,

Lan Tao,

Zhan Qin,

Kui Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haolun and He, Yu and Chen, Tailun and Shao, Shuo and Chu, Zhixuan and Zhou, Hongbin and Tao, Lan and Qin, Zhan and Ren, Kui}, title = {JANUS: A Lightweight Framework for Jailbreaking Text-to-Image Models via Distribution Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15719-15729} }
WiseEdit: Benchmarking Cognition- and Creativity-Informed Image Editing: Kaihang Pan,

Weile Chen,

Haiyi Qiu,

Qifan Yu,

Wendong Bu,

Zehan Wang,

Yun Zhu,

Juncheng Li,

Siliang Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Kaihang and Chen, Weile and Qiu, Haiyi and Yu, Qifan and Bu, Wendong and Wang, Zehan and Zhu, Yun and Li, Juncheng and Tang, Siliang}, title = {WiseEdit: Benchmarking Cognition- and Creativity-Informed Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37268-37278} }
UniAVGen: Unified Audio and Video Generation with Asymmetric Cross-Modal Interactions: Guozhen Zhang,

Zixiang Zhou,

Teng Hu,

Ziqiao Peng,

Youliang Zhang,

Yi Chen,

Yuan Zhou,

Qinglin Lu,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guozhen and Zhou, Zixiang and Hu, Teng and Peng, Ziqiao and Zhang, Youliang and Chen, Yi and Zhou, Yuan and Lu, Qinglin and Wang, Limin}, title = {UniAVGen: Unified Audio and Video Generation with Asymmetric Cross-Modal Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1950-1960} }
When Anonymity Breaks: Identifying Models Behind Text-to-Image Leaderboards: Ali Naseh,

Anshuman Suri,

Yuefeng Peng,

Harsh Chaudhari,

Alina Oprea,

Amir Houmansadr; [pdf] [supp]
[bibtex]
@InProceedings{Naseh_2026_CVPR, author = {Naseh, Ali and Suri, Anshuman and Peng, Yuefeng and Chaudhari, Harsh and Oprea, Alina and Houmansadr, Amir}, title = {When Anonymity Breaks: Identifying Models Behind Text-to-Image Leaderboards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24449-24459} }
PC-Talk: Precise Facial Animation Control for Audio-Driven Talking Face Generation: Baiqin Wang,

Xiangyu Zhu,

Fan Shen,

Hao Xu,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Baiqin and Zhu, Xiangyu and Shen, Fan and Xu, Hao and Lei, Zhen}, title = {PC-Talk: Precise Facial Animation Control for Audio-Driven Talking Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25153-25162} }
Interpretable Prompts made Edit-Friendly: Token-to-Token Similarity Reduction in dLLMs for Edit-Friendly Hard Prompt Inversion: Naresh Kumar Devulapally,

Shruti Agarwal,

Vishal Asnani,

Vishnu Suresh Lokhande; [pdf] [supp]
[bibtex]
@InProceedings{Devulapally_2026_CVPR, author = {Devulapally, Naresh Kumar and Agarwal, Shruti and Asnani, Vishal and Lokhande, Vishnu Suresh}, title = {Interpretable Prompts made Edit-Friendly: Token-to-Token Similarity Reduction in dLLMs for Edit-Friendly Hard Prompt Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43290-43299} }
Egocentric Visibility-Aware Human Pose Estimation: Peng Dai,

Yu Zhang,

Feng Yiqiang,

Zhen Fan,

Yang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Peng and Zhang, Yu and Yiqiang, Feng and Fan, Zhen and Zhang, Yang}, title = {Egocentric Visibility-Aware Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7047-7057} }
CoLoGen: Progressive Learning of Concept-Localization Duality for Unified Image Generation: Yuxin Song,

Yu Lu,

Haoyuan Sun,

Huanjin Yao,

Fanglong Liu,

Yifan Sun,

Haocheng Feng,

Hang Zhou,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yuxin and Lu, Yu and Sun, Haoyuan and Yao, Huanjin and Liu, Fanglong and Sun, Yifan and Feng, Haocheng and Zhou, Hang and Wang, Jingdong}, title = {CoLoGen: Progressive Learning of Concept-Localization Duality for Unified Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14724-14734} }
Topology-aware Feature Propagation for Unsupervised Non-rigid Point Cloud Correspondence: Haozhe Chen,

Rui Li,

Zhengbao Wang,

Xinhao Zhu,

Linjie Li,

Tianyu Xiong,

Xuan Ouyang,

Jiaqi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Haozhe and Li, Rui and Wang, Zhengbao and Zhu, Xinhao and Li, Linjie and Xiong, Tianyu and Ouyang, Xuan and Yang, Jiaqi}, title = {Topology-aware Feature Propagation for Unsupervised Non-rigid Point Cloud Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31389-31399} }
LifeEval: A Multimodal Benchmark for Assistive AI in Egocentric Daily Life Tasks: Hengjian Gao,

Kaiwei Zhang,

Shibo Wang,

Mingjie Chen,

Qihang Cao,

Xianfeng Wang,

Yucheng Zhu,

Xiongkuo Min,

Wei Sun,

Dandan Zhu,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Hengjian and Zhang, Kaiwei and Wang, Shibo and Chen, Mingjie and Cao, Qihang and Wang, Xianfeng and Zhu, Yucheng and Min, Xiongkuo and Sun, Wei and Zhu, Dandan and Zhai, Guangtao}, title = {LifeEval: A Multimodal Benchmark for Assistive AI in Egocentric Daily Life Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32892-32902} }
VesMamba: 3D Pulmonary Vessel Segmentation from CT images via Mamba with Structural Perception and Scale-aware Filtering: Zhipeng Liu,

Guilian Chen,

Zheng Jiang,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhipeng and Chen, Guilian and Jiang, Zheng and Wu, Huisi and Qin, Jing}, title = {VesMamba: 3D Pulmonary Vessel Segmentation from CT images via Mamba with Structural Perception and Scale-aware Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1439-1449} }
FSFSplatter: Geometrically Accurate Reconstruction with Free Sparse-view Images within 2 minutes: Yibin Zhao,

Yihan Pan,

Jun Nan,

Liwei Chen,

Jianjun Yi; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yibin and Pan, Yihan and Nan, Jun and Chen, Liwei and Yi, Jianjun}, title = {FSFSplatter: Geometrically Accurate Reconstruction with Free Sparse-view Images within 2 minutes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26134-26143} }
Visual Grounding for Object Questions: Martin Nicolas Everaert,

Xiruo Liu,

Hiroyuki Takeda,

Raja Bala,

Vivek Yadav,

Vidya Narayanan; [pdf] [supp]
[bibtex]
@InProceedings{Everaert_2026_CVPR, author = {Everaert, Martin Nicolas and Liu, Xiruo and Takeda, Hiroyuki and Bala, Raja and Yadav, Vivek and Narayanan, Vidya}, title = {Visual Grounding for Object Questions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11966-11975} }
HySeg: Learning Generative Priors for Structure-Aware Remote Sensing Segmentation: Jie Qiu,

Xin Li,

Fan Yang,

Yan Wang,

Dong Yu,

Changying Wang,

Linwei Dai,

Yongxiang Chen,

Youqin Chen,

Jianzhang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Jie and Li, Xin and Yang, Fan and Wang, Yan and Yu, Dong and Wang, Changying and Dai, Linwei and Chen, Yongxiang and Chen, Youqin and Chen, Jianzhang}, title = {HySeg: Learning Generative Priors for Structure-Aware Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6410-6420} }
Authorize-on-Demand: Dynamic Authorization with Legality-Aware Intellectual Property Protection for VLMs: Lianyu Wang,

Meng Wang,

Huazhu Fu,

Daoqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lianyu and Wang, Meng and Fu, Huazhu and Zhang, Daoqiang}, title = {Authorize-on-Demand: Dynamic Authorization with Legality-Aware Intellectual Property Protection for VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6621-6630} }
R4Det: 4D Radar-Camera Fusion for High-Performance 3D Object Detection: Zhongyu Xia,

Yousen Tang,

Yongtao Wang,

Zhifeng Wang,

Weijun Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Zhongyu and Tang, Yousen and Wang, Yongtao and Wang, Zhifeng and Qin, Weijun}, title = {R4Det: 4D Radar-Camera Fusion for High-Performance 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18766-18775} }
Decision Boundary-aware Generation for Long-tailed Learning: Jiacheng Yang,

Ruichi Zhang,

Chikai Shang,

Mengke Li,

Xinyi Shang,

Junlong Gao,

Yonggang Zhang,

Yang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiacheng and Zhang, Ruichi and Shang, Chikai and Li, Mengke and Shang, Xinyi and Gao, Junlong and Zhang, Yonggang and Lu, Yang}, title = {Decision Boundary-aware Generation for Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29441-29450} }
When CLIP Sees More, It Fights Back Harder: Multi-View Guided Adaptive Counterattacks for Test-Time Adversarial Robustness: Sunoh Kim,

Daeho Um; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Sunoh and Um, Daeho}, title = {When CLIP Sees More, It Fights Back Harder: Multi-View Guided Adaptive Counterattacks for Test-Time Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15689-15699} }
LLaDA-MedV: Exploring Large Language Diffusion Models for Biomedical Image Understanding: Xuanzhao Dong,

Wenhui Zhu,

Xiwen Chen,

Zhipeng Wang,

Peijie Qiu,

Shao Tang,

Xin Li,

Yalin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Xuanzhao and Zhu, Wenhui and Chen, Xiwen and Wang, Zhipeng and Qiu, Peijie and Tang, Shao and Li, Xin and Wang, Yalin}, title = {LLaDA-MedV: Exploring Large Language Diffusion Models for Biomedical Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22773-22783} }
Attribution-Guided Model Rectification of Unreliable Neural Network Behaviors: Peiyu Yang,

Naveed Akhtar,

Jiantong Jiang,

Ajmal Mian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Peiyu and Akhtar, Naveed and Jiang, Jiantong and Mian, Ajmal}, title = {Attribution-Guided Model Rectification of Unreliable Neural Network Behaviors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38979-38989} }
Agent4FaceForgery: Multi-Agent LLM Framework for Realistic Face Forgery Detection: Yingxin Lai,

Zitong YU,

Jun Wang,

Linlin Shen,

Yong Xu,

Xiaochun Cao; [pdf] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Yingxin and YU, Zitong and Wang, Jun and Shen, Linlin and Xu, Yong and Cao, Xiaochun}, title = {Agent4FaceForgery: Multi-Agent LLM Framework for Realistic Face Forgery Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14073-14083} }
DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation: Zehong Ma,

Longhui Wei,

Shuai Wang,

Shiliang Zhang,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Zehong and Wei, Longhui and Wang, Shuai and Zhang, Shiliang and Tian, Qi}, title = {DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43600-43610} }
Fine-Tuning Impairs the Balancedness of Foundation Models in Long-tailed Personalized Federated Learning: Shihao Hou,

Chikai Shang,

Zhiheng Yang,

Jiacheng Yang,

Xinyi Shang,

Junlong Gao,

Yiqun Zhang,

Yang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Shihao and Shang, Chikai and Yang, Zhiheng and Yang, Jiacheng and Shang, Xinyi and Gao, Junlong and Zhang, Yiqun and Lu, Yang}, title = {Fine-Tuning Impairs the Balancedness of Foundation Models in Long-tailed Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17505-17514} }
Modeling Cross-vision Synergy for Unified Large Vision Model: Shengqiong Wu,

Lanhu Wu,

Mingyang Bao,

Wenhao Xu,

Hanwang Zhang,

Shuicheng Yan,

Hao Fei,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Shengqiong and Wu, Lanhu and Bao, Mingyang and Xu, Wenhao and Zhang, Hanwang and Yan, Shuicheng and Fei, Hao and Chua, Tat-Seng}, title = {Modeling Cross-vision Synergy for Unified Large Vision Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22941-22952} }
D2FANet: Enhancing Video Object Detection with Dual-Domain Feature Aggregation Network: Qiang Qi,

Wenqi Shang,

Meifang Wang,

Xiao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Qiang and Shang, Wenqi and Wang, Meifang and Wang, Xiao}, title = {D2FANet: Enhancing Video Object Detection with Dual-Domain Feature Aggregation Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11229-11239} }
A Geometric Algebra-Informed 3DGS Framework for Wireless Channel Prediction: Jingzhou Shen,

Tianya Zhao,

Xuyu Wang; [pdf]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Jingzhou and Zhao, Tianya and Wang, Xuyu}, title = {A Geometric Algebra-Informed 3DGS Framework for Wireless Channel Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4973-4982} }
Radar-Guided Polynomial Fitting for Metric Depth Estimation: Patrick Rim,

Hyoungseob Park,

Vadim Ezhov,

Jeffrey Moon,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rim_2026_CVPR, author = {Rim, Patrick and Park, Hyoungseob and Ezhov, Vadim and Moon, Jeffrey and Wong, Alex}, title = {Radar-Guided Polynomial Fitting for Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26941-26952} }
QuadSync: Quadrifocal Tensor Synchronization via Tucker Decomposition: Daniel Miao,

Gilad Lerman,

Joe Kileel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2026_CVPR, author = {Miao, Daniel and Lerman, Gilad and Kileel, Joe}, title = {QuadSync: Quadrifocal Tensor Synchronization via Tucker Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28786-28795} }
A Polynomial Chaos Framework for Causal Discovery in Nonlinear Uncertain Systems: Liang Cao; [pdf]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Liang}, title = {A Polynomial Chaos Framework for Causal Discovery in Nonlinear Uncertain Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17474-17483} }
RealVLG-R1: A Large-Scale Real-World Visual-Language Grounding Benchmark for Robotic Perception and Manipulation: Linfei Li,

Lin Zhang,

Ying Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Linfei and Zhang, Lin and Shen, Ying}, title = {RealVLG-R1: A Large-Scale Real-World Visual-Language Grounding Benchmark for Robotic Perception and Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42396-42407} }
LoPrune: Efficient Data Pruning for LoRA-Based Fine-Tuning of Vision Transformer: Qiang He,

Yaozong Yang,

Kaibin Wang,

Ziteng Wei,

Feifei Chen,

Caslon Chua,

Yun Yang; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qiang and Yang, Yaozong and Wang, Kaibin and Wei, Ziteng and Chen, Feifei and Chua, Caslon and Yang, Yun}, title = {LoPrune: Efficient Data Pruning for LoRA-Based Fine-Tuning of Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5242-5252} }
Enhancing Out-of-Distribution Detection with Extended Logit Normalization: Yifan Ding,

Xixi Liu,

Jonas Unger,

Gabriel Eilertsen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Yifan and Liu, Xixi and Unger, Jonas and Eilertsen, Gabriel}, title = {Enhancing Out-of-Distribution Detection with Extended Logit Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24823-24832} }
FlashLips: 100-FPS Mask-Free Latent Lip-Sync using Reconstruction Instead of Diffusion or GANs: Andreas Zinonos,

Michał Stypułkowski,

Antoni Bigata,

Stavros Petridis,

Maja Pantic,

Nikita Drobyshev; [pdf] [supp]
[bibtex]
@InProceedings{Zinonos_2026_CVPR, author = {Zinonos, Andreas and Stypu{\l}kowski, Micha{\l} and Bigata, Antoni and Petridis, Stavros and Pantic, Maja and Drobyshev, Nikita}, title = {FlashLips: 100-FPS Mask-Free Latent Lip-Sync using Reconstruction Instead of Diffusion or GANs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10898-10908} }
Towards Robust Multimodal Large Language Models Against Jailbreak Attacks: Ziyi Yin,

Yuanpu Cao,

Han Liu,

Ting Wang,

Jinghui Chen,

Fenglong Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Ziyi and Cao, Yuanpu and Liu, Han and Wang, Ting and Chen, Jinghui and Ma, Fenglong}, title = {Towards Robust Multimodal Large Language Models Against Jailbreak Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22847-22856} }
Bridging Domain Expertise and Generalization for Performance Estimation: Shuxuan Li,

Zhilin Zhao,

Quyu Kong,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shuxuan and Zhao, Zhilin and Kong, Quyu and Zheng, Wei-Shi}, title = {Bridging Domain Expertise and Generalization for Performance Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7967-7977} }
VideoARM: Agentic Reasoning over Hierarchical Memory for Long-Form Video Understanding: Yufei Yin,

Qianke Meng,

Minghao Chen,

Jiajun Ding,

Zhenwei Shao,

Zhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Yufei and Meng, Qianke and Chen, Minghao and Ding, Jiajun and Shao, Zhenwei and Yu, Zhou}, title = {VideoARM: Agentic Reasoning over Hierarchical Memory for Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24042-24051} }
Test-Time Instance-Specific Parameter Composition: A New Paradigm for Adaptive Generative Modeling: Minh-Tuan Tran,

Xuan-May Le,

Quan Hung Tran,

Mehrtash Harandi,

Dinh Phung,

Trung Le; [pdf] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Minh-Tuan and Le, Xuan-May and Tran, Quan Hung and Harandi, Mehrtash and Phung, Dinh and Le, Trung}, title = {Test-Time Instance-Specific Parameter Composition: A New Paradigm for Adaptive Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37982-37991} }
LA-Pose: Latent Action Pretraining Meets Pose Estimation: Zhengqing Wang,

Saurabh Nair,

Prajwal Chidananda,

Pujith Kachana,

Samuel Li,

Matthew Brown,

Yasutaka Furukawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhengqing and Nair, Saurabh and Chidananda, Prajwal and Kachana, Pujith and Li, Samuel and Brown, Matthew and Furukawa, Yasutaka}, title = {LA-Pose: Latent Action Pretraining Meets Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34460-34469} }
Motion-Aware Animatable Gaussian Avatars Deblurring: Muyao Niu,

Yifan Zhan,

Qingtian Zhu,

Zhuoxiao Li,

Wei Wang,

Zhihang Zhong,

Xiao Sun,

Yinqiang Zheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Muyao and Zhan, Yifan and Zhu, Qingtian and Li, Zhuoxiao and Wang, Wei and Zhong, Zhihang and Sun, Xiao and Zheng, Yinqiang}, title = {Motion-Aware Animatable Gaussian Avatars Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40140-40151} }
240FPS Stereo Vision from Monocular Mixed Spikes: Yeliduosi Xiaokaiti,

Yakun Chang,

Yang Bai,

Zhaojun Huang,

Peiqi Duan,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Xiaokaiti_2026_CVPR, author = {Xiaokaiti, Yeliduosi and Chang, Yakun and Bai, Yang and Huang, Zhaojun and Duan, Peiqi and Shi, Boxin}, title = {240FPS Stereo Vision from Monocular Mixed Spikes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26688-26697} }
VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping: Haotian Dong,

Ye Li,

Rongwei Lu,

Chen Tang,

Shu-Tao Xia,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Haotian and Li, Ye and Lu, Rongwei and Tang, Chen and Xia, Shu-Tao and Wang, Zhi}, title = {VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12173-12182} }
Beyond Top Activations: Efficient and Reliable Crowdsourced Evaluation of Automated Interpretability: Tuomas Oikarinen,

Ge Yan,

Akshay Kulkarni,

Tsui-Wei Weng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oikarinen_2026_CVPR, author = {Oikarinen, Tuomas and Yan, Ge and Kulkarni, Akshay and Weng, Tsui-Wei}, title = {Beyond Top Activations: Efficient and Reliable Crowdsourced Evaluation of Automated Interpretability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2885-2894} }
Balanced Dataset Distillation via Modeling Multiple Visual Pattern Distribution: Guanghui Shi,

Xuefeng Liang,

Qixiang Wen; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Guanghui and Liang, Xuefeng and Wen, Qixiang}, title = {Balanced Dataset Distillation via Modeling Multiple Visual Pattern Distribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19634-19643} }
NOVA: Sparse Control, Dense Synthesis for Pair-Free Video Editing: Tianlin Pan,

Jiayi Dai,

Chenpu Yuan,

Zhengyao Lv,

Binxin Yang,

Hubery Yin,

Chen Li,

Jing Lyu,

Caifeng Shan,

Chenyang Si; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Tianlin and Dai, Jiayi and Yuan, Chenpu and Lv, Zhengyao and Yang, Binxin and Yin, Hubery and Li, Chen and Lyu, Jing and Shan, Caifeng and Si, Chenyang}, title = {NOVA: Sparse Control, Dense Synthesis for Pair-Free Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1917-1927} }
X-Part: High Fidelity And Structure Coherent Shape Decomposition And Completion: Xinhao Yan,

Jiachen Xu,

Yang Li,

Changfeng Ma,

Yunhan Yang,

Chunshi Wang,

Zibo Zhao,

Zeqiang Lai,

Yunfei Zhao,

Zhuo Chen,

Chunchao Guo; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Xinhao and Xu, Jiachen and Li, Yang and Ma, Changfeng and Yang, Yunhan and Wang, Chunshi and Zhao, Zibo and Lai, Zeqiang and Zhao, Yunfei and Chen, Zhuo and Guo, Chunchao}, title = {X-Part: High Fidelity And Structure Coherent Shape Decomposition And Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27062-27071} }
FastEventDGS: Deformable Gaussian Splatting for Fast Dynamic Scenes from a Single Event Camera: Zijia Dai,

Nico Messikommer,

Rong Zou,

Nikola Zubic,

Davide Scaramuzza,

Laurent Kneip; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Zijia and Messikommer, Nico and Zou, Rong and Zubic, Nikola and Scaramuzza, Davide and Kneip, Laurent}, title = {FastEventDGS: Deformable Gaussian Splatting for Fast Dynamic Scenes from a Single Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29537-29546} }
FE2E: From Editor to Dense Geometry Estimator: Jiyuan Wang,

Chunyu Lin,

Lei Sun,

Rongying Liu,

Lang Nie,

Mingxing Li,

Kang Liao,

Xiangxiang Chu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiyuan and Lin, Chunyu and Sun, Lei and Liu, Rongying and Nie, Lang and Li, Mingxing and Liao, Kang and Chu, Xiangxiang}, title = {FE2E: From Editor to Dense Geometry Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19844-19853} }
Bridging Fidelity-Reality with Controllable One-Step Diffusion for Image Super-Resolution: Hao Chen,

Junyang Chen,

Jinshan Pan,

Jiangxin Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hao and Chen, Junyang and Pan, Jinshan and Dong, Jiangxin}, title = {Bridging Fidelity-Reality with Controllable One-Step Diffusion for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30584-30594} }
Adaptive Confidence Regularization for Multimodal Failure Detection: Moru Liu,

Hao Dong,

Olga Fink,

Mario Trapp; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Moru and Dong, Hao and Fink, Olga and Trapp, Mario}, title = {Adaptive Confidence Regularization for Multimodal Failure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15850-15859} }
ACE-Merging: Data-Free Model Merging with Adaptive Covariance Estimation: Bo Xu,

Haotian Wu,

Hehai Lin,

Weiquan Huang,

Beier Zhu,

Yao Shu,

Chengwei Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Wu, Haotian and Lin, Hehai and Huang, Weiquan and Zhu, Beier and Shu, Yao and Qin, Chengwei}, title = {ACE-Merging: Data-Free Model Merging with Adaptive Covariance Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29472-29482} }
Hyperbolic Busemann Neural Networks: Ziheng Chen,

Bernhard Schölkopf,

Nicu Sebe; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Ziheng and Sch\"olkopf, Bernhard and Sebe, Nicu}, title = {Hyperbolic Busemann Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42029-42038} }
ChimeraLoRA: Multi-Head LoRA-Guided Synthetic Datasets: Hoyoung Kim,

Minwoo Jang,

Jabin Koo,

Sangdoo Yun,

Jungseul Ok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hoyoung and Jang, Minwoo and Koo, Jabin and Yun, Sangdoo and Ok, Jungseul}, title = {ChimeraLoRA: Multi-Head LoRA-Guided Synthetic Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9063-9073} }
TTAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events: Jiaxiong Liu,

Zhen Tan,

Jinpu Zhang,

Yi Zhou,

Hui Shen,

Xieyuanli Chen,

Dewen Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaxiong and Tan, Zhen and Zhang, Jinpu and Zhou, Yi and Shen, Hui and Chen, Xieyuanli and Hu, Dewen}, title = {TTAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37053-37062} }
CREval: An Automated Interpretable Evaluation for Creative Image Manipulation under Complex Instructions: Chonghuinan Wang,

Zihan Chen,

Yuxiang Wei,

Tianyi Jiang,

Xiaohe Wu,

Fan Li,

Wangmeng Zuo,

Hongxun Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chonghuinan and Chen, Zihan and Wei, Yuxiang and Jiang, Tianyi and Wu, Xiaohe and Li, Fan and Zuo, Wangmeng and Yao, Hongxun}, title = {CREval: An Automated Interpretable Evaluation for Creative Image Manipulation under Complex Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9029-9039} }
FVAR: Next-Focus Prediction for Visual Autoregressive Modeling: Xiaofan Li,

Chenming Wu,

Yanpeng Sun,

Jiaming Zhou,

Delin Qu,

Yansong Qu,

Weihao Bo,

Haibao Yu,

Dingkang Liang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaofan and Wu, Chenming and Sun, Yanpeng and Zhou, Jiaming and Qu, Delin and Qu, Yansong and Bo, Weihao and Yu, Haibao and Liang, Dingkang}, title = {FVAR: Next-Focus Prediction for Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30391-30401} }
OLATverse: A Large-scale Real-world Object Dataset with Precise Lighting Control: Xilong Zhou,

Jianchun Chen,

Pramod Rao,

Timo Teufel,

Linjie Lyu,

Tigran Minasian,

Oleksandr Sotnychenko,

Xiao-Xiao Long,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xilong and Chen, Jianchun and Rao, Pramod and Teufel, Timo and Lyu, Linjie and Minasian, Tigran and Sotnychenko, Oleksandr and Long, Xiao-Xiao and Habermann, Marc and Theobalt, Christian}, title = {OLATverse: A Large-scale Real-world Object Dataset with Precise Lighting Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28848-28859} }
Landscape-Awareness for Geometric View Diffusion Model: Yan-Ting Chen,

Hao-Wei Chen,

Tsu-Ching Hsiao,

Chun-Yi Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yan-Ting and Chen, Hao-Wei and Hsiao, Tsu-Ching and Lee, Chun-Yi}, title = {Landscape-Awareness for Geometric View Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38475-38486} }
MedGRPO: Multi-Task Reinforcement Learning for Heterogeneous Medical Video Understanding: Yuhao Su,

Anwesa Choudhuri,

Zhongpai Gao,

Benjamin Planche,

Van Nguyen Nguyen,

Meng Zheng,

Yuhan Shen,

Arun Innanje,

Terrence Chen,

Ehsan Elhamifar,

Ziyan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Yuhao and Choudhuri, Anwesa and Gao, Zhongpai and Planche, Benjamin and Nguyen, Van Nguyen and Zheng, Meng and Shen, Yuhan and Innanje, Arun and Chen, Terrence and Elhamifar, Ehsan and Wu, Ziyan}, title = {MedGRPO: Multi-Task Reinforcement Learning for Heterogeneous Medical Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2788-2798} }
Modeling Spatiotemporal Neural Frames for High Resolution Brain Dynamic: Wanying Qu,

Jianxiong Gao,

Wei Wang,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Wanying and Gao, Jianxiong and Wang, Wei and Fu, Yanwei}, title = {Modeling Spatiotemporal Neural Frames for High Resolution Brain Dynamic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6994-7002} }
One Algorithm to Align Them All: Boyi Pang,

Savva Ignatyev,

Vladimir Ippolitov,

Ramil Khafizov,

Yurii Melnik,

Oleg Voynov,

Maksim Nakhodnov,

Aibek Alanov,

Xiaopeng Fan,

Peter Wonka,

Evgeny Burnaev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2026_CVPR, author = {Pang, Boyi and Ignatyev, Savva and Ippolitov, Vladimir and Khafizov, Ramil and Melnik, Yurii and Voynov, Oleg and Nakhodnov, Maksim and Alanov, Aibek and Fan, Xiaopeng and Wonka, Peter and Burnaev, Evgeny}, title = {One Algorithm to Align Them All}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30446-30456} }
Beyond [CLS] Token: Query-Driven Token-Level Forgery Purification for Generalizable Deepfake Detection: Changshuo Wang,

Jiangming Wang,

Ke-Yue Zhang,

Taiping Yao,

Shouhong Ding,

Shunli Wang,

Ran Yi,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Changshuo and Wang, Jiangming and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong and Wang, Shunli and Yi, Ran and Ma, Lizhuang}, title = {Beyond [CLS] Token: Query-Driven Token-Level Forgery Purification for Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42922-42931} }
LoST: Level of Semantics Tokenization for 3D Shapes: Niladri Shekhar Dutt,

Zifan Shi,

Paul Guerrero,

Chun-Hao Paul Huang,

Duygu Ceylan,

Niloy J. Mitra,

Xuelin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dutt_2026_CVPR, author = {Dutt, Niladri Shekhar and Shi, Zifan and Guerrero, Paul and Huang, Chun-Hao Paul and Ceylan, Duygu and Mitra, Niloy J. and Chen, Xuelin}, title = {LoST: Level of Semantics Tokenization for 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19950-19959} }
Differentiable Stroke Planning with Dual Parameterization for Efficient and High-Fidelity Painting Creation: Jinfan Liu,

Wuze Zhang,

Zhangli Hu,

Zhehan Zhao,

Ye Chen,

Bingbing Ni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinfan and Zhang, Wuze and Hu, Zhangli and Zhao, Zhehan and Chen, Ye and Ni, Bingbing}, title = {Differentiable Stroke Planning with Dual Parameterization for Efficient and High-Fidelity Painting Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26721-26730} }
Predict Before You Explore: Predictive Planning with Specialized Memory for Embodied Question Answering: Bowen Yuan,

Sisi You,

Bing-Kun Bao; [pdf] [supp]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Bowen and You, Sisi and Bao, Bing-Kun}, title = {Predict Before You Explore: Predictive Planning with Specialized Memory for Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29610-29619} }
Score2Instruct: Scaling Up Video Quality-Centric Instructions via Automated Dimension Scoring: Qizhi Xie,

Kun Yuan,

Yunpeng Qu,

Jiachao Gong,

Mingda Wu,

Ming Sun,

Chao Zhou,

Jihong Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Qizhi and Yuan, Kun and Qu, Yunpeng and Gong, Jiachao and Wu, Mingda and Sun, Ming and Zhou, Chao and Zhu, Jihong}, title = {Score2Instruct: Scaling Up Video Quality-Centric Instructions via Automated Dimension Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11426-11436} }
Pano360: Perspective to Panoramic Vision with Geometric Consistency: Zhengdong Zhu,

Weiyi Xue,

Zuyuan Yang,

Wenlve Zhou,

Zhiheng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengdong and Xue, Weiyi and Yang, Zuyuan and Zhou, Wenlve and Zhou, Zhiheng}, title = {Pano360: Perspective to Panoramic Vision with Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7600-7609} }
RenderFlow: Single-Step Neural Rendering via Flow Matching: Shenghao Zhang,

Runtao Liu,

Christopher Schroers,

Yang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shenghao and Liu, Runtao and Schroers, Christopher and Zhang, Yang}, title = {RenderFlow: Single-Step Neural Rendering via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40602-40611} }
RDF-MIG: A Robust Diffusion Framework for Masked Image Generation to Augment Semantic Segmentation and Change Detection: Zian Cao,

Wei Wei,

Qingshan Gao,

Yuanyuan Fu; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Zian and Wei, Wei and Gao, Qingshan and Fu, Yuanyuan}, title = {RDF-MIG: A Robust Diffusion Framework for Masked Image Generation to Augment Semantic Segmentation and Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35758-35767} }
iLRM: An Iterative Large 3D Reconstruction Model: Gyeongjin Kang,

Seungtae Nam,

Seungkwon Yang,

Xiangyu Sun,

Sameh Khamis,

Abdelrahman Mohamed,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Gyeongjin and Nam, Seungtae and Yang, Seungkwon and Sun, Xiangyu and Khamis, Sameh and Mohamed, Abdelrahman and Park, Eunbyung}, title = {iLRM: An Iterative Large 3D Reconstruction Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37332-37342} }
Adaptive Depth Lightweight RGB-T Tracking with Holistic Token Routing: Tian Ding,

Hongtao Yang,

Liangtao Shi,

Jun Li,

Xiantao Hu,

Jian Yang,

Ying Tai; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Tian and Yang, Hongtao and Shi, Liangtao and Li, Jun and Hu, Xiantao and Yang, Jian and Tai, Ying}, title = {Adaptive Depth Lightweight RGB-T Tracking with Holistic Token Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20942-20952} }
Towards an Incremental Unified Multimodal Anomaly Detection: Augmenting Multimodal Denoising From an Information Bottleneck Perspective: Kaifang Long,

Lianbo Ma,

Jiaqi Liu,

Liming Liu,

Guoyang Xie; [pdf] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Kaifang and Ma, Lianbo and Liu, Jiaqi and Liu, Liming and Xie, Guoyang}, title = {Towards an Incremental Unified Multimodal Anomaly Detection: Augmenting Multimodal Denoising From an Information Bottleneck Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14116-14125} }
NeuroFlow: Toward Unified Visual Encoding and Decoding from Neural Activity: Weijian Mai,

Mu Nan,

Yu Zhu,

Jiahang Cao,

Rui Zhang,

Yuqin Dai,

Chunfeng Song,

Andrew Luo,

Jiamin Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Weijian and Nan, Mu and Zhu, Yu and Cao, Jiahang and Zhang, Rui and Dai, Yuqin and Song, Chunfeng and Luo, Andrew and Wu, Jiamin}, title = {NeuroFlow: Toward Unified Visual Encoding and Decoding from Neural Activity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12966-12976} }
Unified Multimodal Models as Auto-Encoders: Zhiyuan Yan,

Kaiqing Lin,

Zongjian Li,

Junyan Ye,

Hui Han,

Haochen Wang,

Zhendong Wang,

Bin Lin,

Hao Li,

Xinyan Xiao,

Jingdong Wang,

Haifeng Wang,

Li Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Zhiyuan and Lin, Kaiqing and Li, Zongjian and Ye, Junyan and Han, Hui and Wang, Haochen and Wang, Zhendong and Lin, Bin and Li, Hao and Xiao, Xinyan and Wang, Jingdong and Wang, Haifeng and Yuan, Li}, title = {Unified Multimodal Models as Auto-Encoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41903-41912} }
Test-Time 3D Occupancy Prediction: Fengyi Zhang,

Xiangyu Sun,

Huitong Yang,

Zheng Zhang,

Zi Huang,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Fengyi and Sun, Xiangyu and Yang, Huitong and Zhang, Zheng and Huang, Zi and Luo, Yadan}, title = {Test-Time 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35691-35701} }
PhaseWin Search Framework Enable Efficient Object-Level Interpretation: Zihan Gu,

Ruoyu Chen,

Junchi Zhang,

Yue Hu,

Hua Zhang,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zihan and Chen, Ruoyu and Zhang, Junchi and Hu, Yue and Zhang, Hua and Cao, Xiaochun}, title = {PhaseWin Search Framework Enable Efficient Object-Level Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2875-2884} }
DGS: Dual Gradient and Semantic-Shift Guided Low-Rank Adaptation for Class Incremental Learning: Kai Li,

Jiafeng Li,

Lianghua He,

Ying Wen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kai and Li, Jiafeng and He, Lianghua and Wen, Ying}, title = {DGS: Dual Gradient and Semantic-Shift Guided Low-Rank Adaptation for Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32267-32277} }
Robo-SGG: Exploiting Layout-Oriented Normalization and Restitution Can Improve Robust Scene Graph Generation: Changsheng Lv,

Zijian Fu,

Mengshi Qi; [pdf] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Changsheng and Fu, Zijian and Qi, Mengshi}, title = {Robo-SGG: Exploiting Layout-Oriented Normalization and Restitution Can Improve Robust Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39282-39292} }
Vision-Language Model Guided Source-Free Domain Adaptation via Optimal Transport: Shuo Han,

Xu Tang,

Jingjing Ma,

Xiangrong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Shuo and Tang, Xu and Ma, Jingjing and Zhang, Xiangrong}, title = {Vision-Language Model Guided Source-Free Domain Adaptation via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36989-36998} }
Learning to Generate Highly Dynamic Videos using Synthetic Motion Data: Wonjoon Jin,

Jiyun Won,

Janghyeok Han,

Qi Dai,

Chong Luo,

Seung-Hwan Baek,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Wonjoon and Won, Jiyun and Han, Janghyeok and Dai, Qi and Luo, Chong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Learning to Generate Highly Dynamic Videos using Synthetic Motion Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18331-18341} }
ProgTrack: A Multi-Object Tracking Algorithm with Progressive Matching Strategy: Chenhui Zhang,

Guoqing Dong,

Weijie Peng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenhui and Dong, Guoqing and Peng, Weijie}, title = {ProgTrack: A Multi-Object Tracking Algorithm with Progressive Matching Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20902-20911} }
Adapting Lightweight Image-based Counting Models for Video Crowd Counting: Weibo Shu,

Antoni B. Chan; [pdf] [supp]
[bibtex]
@InProceedings{Shu_2026_CVPR, author = {Shu, Weibo and Chan, Antoni B.}, title = {Adapting Lightweight Image-based Counting Models for Video Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35227-35237} }
FedAFD: Multimodal Federated Learning via Adversarial Fusion and Distillation: Min Tan,

Junchao Ma,

Yinfu Feng,

Jiajun Ding,

Wenwen Pan,

Tingting Han,

Qian Zheng,

Zhenzhong Kuang,

Zhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Min and Ma, Junchao and Feng, Yinfu and Ding, Jiajun and Pan, Wenwen and Han, Tingting and Zheng, Qian and Kuang, Zhenzhong and Yu, Zhou}, title = {FedAFD: Multimodal Federated Learning via Adversarial Fusion and Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3400-3409} }
ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation: Wei Li,

Jizhihui Liu,

Li Yixing,

Junwen Tong,

Rui Shao,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wei and Liu, Jizhihui and Yixing, Li and Tong, Junwen and Shao, Rui and Nie, Liqiang}, title = {ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6706-6717} }
ActiveAD: Planning-Oriented Active Learning for End-to-End Autonomous Driving: Han Lu,

Xiaosong Jia,

Yichen Xie,

Siyu Sun,

Wenlong Liao,

Xiaokang Yang,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Han and Jia, Xiaosong and Xie, Yichen and Sun, Siyu and Liao, Wenlong and Yang, Xiaokang and Yan, Junchi}, title = {ActiveAD: Planning-Oriented Active Learning for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3656-3666} }
Plug-and-Play Incomplete Multi-View Clustering via Janus-Faced Affinity Learning with Topology Harmonization: Shengju Yu,

Suyuan Liu,

Wenhao Shao,

Siwei Wang,

Ke Liang,

Xihong Yang,

Tiejun Li,

Xinwang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Shengju and Liu, Suyuan and Shao, Wenhao and Wang, Siwei and Liang, Ke and Yang, Xihong and Li, Tiejun and Liu, Xinwang}, title = {Plug-and-Play Incomplete Multi-View Clustering via Janus-Faced Affinity Learning with Topology Harmonization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3594-3603} }
SHOW3D: Capturing Scenes of 3D Hands and Objects in the Wild: Patrick Rim,

Kevin Harris,

Braden Copple,

Shangchen Han,

Xu Xie,

Ivan Shugurov,

Sizhe An,

He Wen,

Alex Wong,

Tomas Hodan,

Kun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rim_2026_CVPR, author = {Rim, Patrick and Harris, Kevin and Copple, Braden and Han, Shangchen and Xie, Xu and Shugurov, Ivan and An, Sizhe and Wen, He and Wong, Alex and Hodan, Tomas and He, Kun}, title = {SHOW3D: Capturing Scenes of 3D Hands and Objects in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7111-7120} }
MicroFM: Physics-guided Flow Matching for Isotropic Microscopy Reconstruction: Xingzu Zhan,

Runmin Jiang,

Vatsal Gupta,

Tanush Swaminathan,

Yanwen Wang,

Genpei Zhang,

Haili Wang,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Xingzu and Jiang, Runmin and Gupta, Vatsal and Swaminathan, Tanush and Wang, Yanwen and Zhang, Genpei and Wang, Haili and Xu, Min}, title = {MicroFM: Physics-guided Flow Matching for Isotropic Microscopy Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15639-15648} }
Cross-Instance Gaussian Splatting Registration via Geometry-Aware Feature-Guided Alignment: Roy Amoyal,

Oren Freifeld,

Chaim Baskin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Amoyal_2026_CVPR, author = {Amoyal, Roy and Freifeld, Oren and Baskin, Chaim}, title = {Cross-Instance Gaussian Splatting Registration via Geometry-Aware Feature-Guided Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4993-5002} }
MoReGen: Multi-Agent Motion-Reasoning Engine for Code-based Text-to-Video Synthesis: Xiangyu Bai,

He Liang,

Bishoy Galoaa,

Utsav Nandi,

Shayda Moezzi,

Yuhang He,

Sarah Ostadabbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Xiangyu and Liang, He and Galoaa, Bishoy and Nandi, Utsav and Moezzi, Shayda and He, Yuhang and Ostadabbas, Sarah}, title = {MoReGen: Multi-Agent Motion-Reasoning Engine for Code-based Text-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7632-7642} }
DDiT: Dynamic Patch Scheduling for Efficient Diffusion Transformers: Dahye Kim,

Deepti Ghadiyaram,

Raghudeep Gadde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Dahye and Ghadiyaram, Deepti and Gadde, Raghudeep}, title = {DDiT: Dynamic Patch Scheduling for Efficient Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11459-11471} }
HyperGaussians: High-Dimensional Gaussian Splatting for High-Fidelity Animatable Face Avatars: Gent Serifi,

Marcel C. Buehler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Serifi_2026_CVPR, author = {Serifi, Gent and Buehler, Marcel C.}, title = {HyperGaussians: High-Dimensional Gaussian Splatting for High-Fidelity Animatable Face Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25236-25247} }
Spot The Ball: A Benchmark for Visual Social Inference: Neha Balamurugan,

Sarah Wu,

Cristobal Eyzaguirre,

Tobias Gerstenberg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Balamurugan_2026_CVPR, author = {Balamurugan, Neha and Wu, Sarah and Eyzaguirre, Cristobal and Gerstenberg, Tobias}, title = {Spot The Ball: A Benchmark for Visual Social Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30919-30928} }
Bridging the Modality Gap in Compositional Zero-Shot Learning via Sparse Alignment and Unimodal Memory Bank: Yang Zhang,

Zhixiang Chi,

Xudong Yan,

Yang Wang,

Songhe Feng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yang and Chi, Zhixiang and Yan, Xudong and Wang, Yang and Feng, Songhe}, title = {Bridging the Modality Gap in Compositional Zero-Shot Learning via Sparse Alignment and Unimodal Memory Bank}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5553-5563} }
SSM-Aware Token-Efficient VMamba via Adaptive Patch Pruning and Merging for Person Re-Identification: Huiyuan Huang,

Sang Min Yoon; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Huiyuan and Yoon, Sang Min}, title = {SSM-Aware Token-Efficient VMamba via Adaptive Patch Pruning and Merging for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4363-4372} }
CARI4D: Category Agnostic 4D Reconstruction of Human-Object Interaction: Xianghui Xie,

Bowen Wen,

Yan Chang,

Hesam Rabeti,

Jiefeng Li,

Ye Yuan,

Gerard Pons-Moll,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Xianghui and Wen, Bowen and Chang, Yan and Rabeti, Hesam and Li, Jiefeng and Yuan, Ye and Pons-Moll, Gerard and Birchfield, Stan}, title = {CARI4D: Category Agnostic 4D Reconstruction of Human-Object Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14006-14016} }
SGI: Structured 2D Gaussians for Efficient and Compact Large Image Representation: Zixuan Pan,

Kaiyuan Tang,

Jun Xia,

Yifan Qin,

Lin Gu,

Chaoli Wang,

Jianxu Chen,

Yiyu Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zixuan and Tang, Kaiyuan and Xia, Jun and Qin, Yifan and Gu, Lin and Wang, Chaoli and Chen, Jianxu and Shi, Yiyu}, title = {SGI: Structured 2D Gaussians for Efficient and Compact Large Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12162-12172} }
FocusUI: Efficient UI Grounding via Position-Preserving Visual Token Selection: Mingyu Ouyang,

Kevin Qinghong Lin,

Mike Zheng Shou,

Hwee Tou Ng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Mingyu and Lin, Kevin Qinghong and Shou, Mike Zheng and Ng, Hwee Tou}, title = {FocusUI: Efficient UI Grounding via Position-Preserving Visual Token Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20313-20323} }
VisMem: Latent Vision Memory Unlocks Potential of Vision-Language Models: Xinlei Yu,

Chengming Xu,

Guibin Zhang,

Zhangquan Chen,

Yudong Zhang,

Yongbo He,

Peng-Tao Jiang,

Jiangning Zhang,

Xiaobin Hu,

Shuicheng Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xinlei and Xu, Chengming and Zhang, Guibin and Chen, Zhangquan and Zhang, Yudong and He, Yongbo and Jiang, Peng-Tao and Zhang, Jiangning and Hu, Xiaobin and Yan, Shuicheng}, title = {VisMem: Latent Vision Memory Unlocks Potential of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31544-31555} }
ExMesh: EXplicit Mesh Reconstruction with Topology Adaptation: Chuanjin Fan,

Lifan Wu,

Wenjie Chang,

Hanzhi Chang,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Chuanjin and Wu, Lifan and Chang, Wenjie and Chang, Hanzhi and Yang, Wenfei and Zhang, Tianzhu}, title = {ExMesh: EXplicit Mesh Reconstruction with Topology Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27136-27145} }
Scalable Feature Matching via State Space Modeling and Sparse Correlation: Sin Wai Choo,

Bo Li; [pdf]
[bibtex]
@InProceedings{Choo_2026_CVPR, author = {Choo, Sin Wai and Li, Bo}, title = {Scalable Feature Matching via State Space Modeling and Sparse Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6685-6694} }
Real-World Point Tracking with Verifier-Guided Pseudo-Labeling: Görkay Aydemir,

Fatma Güney,

Weidi Xie; [pdf] [supp]
[bibtex]
@InProceedings{Aydemir_2026_CVPR, author = {Aydemir, G\"orkay and G\"uney, Fatma and Xie, Weidi}, title = {Real-World Point Tracking with Verifier-Guided Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13553-13562} }
Multi-Modal Representation Learning via Semi-Supervised Rate Reduction for Generalized Category Discovery: Wei He,

Xianghan Meng,

Zhiyuan Huang,

Xianbiao Qi,

Rong Xiao,

Chun-Guang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Wei and Meng, Xianghan and Huang, Zhiyuan and Qi, Xianbiao and Xiao, Rong and Li, Chun-Guang}, title = {Multi-Modal Representation Learning via Semi-Supervised Rate Reduction for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39637-39646} }
ShotDirector: Directorially Controllable Multi-Shot Video Generation with Cinematographic Transitions: Xiaoxue Wu,

Xinyuan Chen,

Yaohui Wang,

Yu Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoxue and Chen, Xinyuan and Wang, Yaohui and Qiao, Yu}, title = {ShotDirector: Directorially Controllable Multi-Shot Video Generation with Cinematographic Transitions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2079-2089} }
4DWorldBench: A Comprehensive Evaluation Framework for 3D/4D World Generation Models: Yiting Lu,

Wei Luo,

Peiyan Tu,

Haoran Li,

Hanxin Zhu,

Zihao Yu,

Xingrui Wang,

Xinyi Chen,

Xinge Peng,

Xin Li,

Zhibo Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yiting and Luo, Wei and Tu, Peiyan and Li, Haoran and Zhu, Hanxin and Yu, Zihao and Wang, Xingrui and Chen, Xinyi and Peng, Xinge and Li, Xin and Chen, Zhibo}, title = {4DWorldBench: A Comprehensive Evaluation Framework for 3D/4D World Generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34322-34332} }
Dual-Prototype-Guided Multi-task Learning for Unsupervised Anomaly Detection and Classification: Qianhao Luo,

Jiajia Mi,

Mingtao Yan,

JingSheng Liu,

ShuYang Pang,

Weiling Li; [pdf]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Qianhao and Mi, Jiajia and Yan, Mingtao and Liu, JingSheng and Pang, ShuYang and Li, Weiling}, title = {Dual-Prototype-Guided Multi-task Learning for Unsupervised Anomaly Detection and Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14137-14146} }
TaskIT: Memory-Efficient Fine-Tuning of Multi-LoRA LLMs via Cross-Task Importance Transfer: Cheng Fang,

Zimu Zhou,

Ke Ma,

Bin Guo; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Cheng and Zhou, Zimu and Ma, Ke and Guo, Bin}, title = {TaskIT: Memory-Efficient Fine-Tuning of Multi-LoRA LLMs via Cross-Task Importance Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37010-37021} }
Towards Open-Vocabulary Industrial Defect Understanding with a Large-Scale Multimodal Dataset: Tsai-Ching Ni,

Cheng-Chi Chen,

Yuan-Fu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Tsai-Ching and Chen, Cheng-Chi and Yang, Yuan-Fu}, title = {Towards Open-Vocabulary Industrial Defect Understanding with a Large-Scale Multimodal Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13059-13068} }
Style-GRPO: Semantic-Aware Preference Optimization for Image Style Transfer Guided by Reward Modeling: Jianbin Zhao,

Chaoran Feng,

Miao Yu,

Yingtao Li,

Zhenyu Tang,

Wangbo Yu,

Yian Zhao,

Xiaomin Li,

Li Yuan,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianbin and Feng, Chaoran and Yu, Miao and Li, Yingtao and Tang, Zhenyu and Yu, Wangbo and Zhao, Yian and Li, Xiaomin and Yuan, Li and Tian, Yonghong}, title = {Style-GRPO: Semantic-Aware Preference Optimization for Image Style Transfer Guided by Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12818-12828} }
AXG-Reasoner: Error Detection and Explanation in Long Task Videos with Vision-Language Models: Shih-Po Lee,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Shih-Po and Elhamifar, Ehsan}, title = {AXG-Reasoner: Error Detection and Explanation in Long Task Videos with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3421-3431} }
URICA: A Uniformity Region Affine Identifier Capture Algorithm for Arbitrary Region Retrieval in Pathology Images: Ri Su,

Zhao Chen,

Caleb Chen Cao,

Lei Chen; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Ri and Chen, Zhao and Cao, Caleb Chen and Chen, Lei}, title = {URICA: A Uniformity Region Affine Identifier Capture Algorithm for Arbitrary Region Retrieval in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32777-32786} }
Mesh4D: 4D Mesh Reconstruction and Tracking from Monocular Video: Zeren Jiang,

Chuanxia Zheng,

Iro Laina,

Diane Larlus,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zeren and Zheng, Chuanxia and Laina, Iro and Larlus, Diane and Vedaldi, Andrea}, title = {Mesh4D: 4D Mesh Reconstruction and Tracking from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14524-14535} }
Learning to See Through a Baby's Eyes: Early Visual Diets Enable Robust Visual Intelligence in Humans and Machines: Yusen Cai,

Qing Lin,

Bhargava Satya Nunna,

Mengmi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Yusen and Lin, Qing and Nunna, Bhargava Satya and Zhang, Mengmi}, title = {Learning to See Through a Baby's Eyes: Early Visual Diets Enable Robust Visual Intelligence in Humans and Machines}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13766-13780} }
Conflict-Aware Adaptive Cross-Reconstruction for Multimodal Sentiment Analysis: Yan Wang,

Fuyuan Cao,

Xingwang Zhao; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yan and Cao, Fuyuan and Zhao, Xingwang}, title = {Conflict-Aware Adaptive Cross-Reconstruction for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15880-15889} }
Neurodynamics-Driven Coupled Neural P Systems for Multi-Focus Image Fusion: Bo Li,

Yunkuo Lei,

Tingting Bao,

Hang Yan,

Yaxian Wang,

Weiping Fu,

Lingling Zhang,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bo and Lei, Yunkuo and Bao, Tingting and Yan, Hang and Wang, Yaxian and Fu, Weiping and Zhang, Lingling and Liu, Jun}, title = {Neurodynamics-Driven Coupled Neural P Systems for Multi-Focus Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26541-26550} }
MV-RoMa: From Pairwise Matching into Multi-View Track Reconstruction: Jongmin Lee,

Seungyeop Kang,

Sungjoo Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jongmin and Kang, Seungyeop and Yoo, Sungjoo}, title = {MV-RoMa: From Pairwise Matching into Multi-View Track Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7446-7456} }
EchoPOSE: 6D Pose Estimation of Sparse Echocardiograms for Left-Ventricular 3D Shape Reconstruction: Lucas Iijima,

Yihao Luo,

Dario Sesia,

Amit Kaura,

Jamil Mayet,

Choon Hwai Yap; [pdf] [supp]
[bibtex]
@InProceedings{Iijima_2026_CVPR, author = {Iijima, Lucas and Luo, Yihao and Sesia, Dario and Kaura, Amit and Mayet, Jamil and Yap, Choon Hwai}, title = {EchoPOSE: 6D Pose Estimation of Sparse Echocardiograms for Left-Ventricular 3D Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22753-22762} }
FedMOP: Achieving Enhanced Privacy and Performance in Federated Learning via Momentum Orthogonal Projection: Yunlong Zhao,

Xiaoheng Deng,

Hongyan Xu,

Zhuohua Qiu,

Xiaowen Hu,

Shan You,

Yi Chen,

Chang Xu,

Xiu Su; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yunlong and Deng, Xiaoheng and Xu, Hongyan and Qiu, Zhuohua and Hu, Xiaowen and You, Shan and Chen, Yi and Xu, Chang and Su, Xiu}, title = {FedMOP: Achieving Enhanced Privacy and Performance in Federated Learning via Momentum Orthogonal Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39425-39434} }
FlexAvatar: Learning Complete 3D Head Avatars with Partial Supervision: Tobias Kirschstein,

Simon Giebenhain,

Matthias Nießner; [pdf] [supp]
[bibtex]
@InProceedings{Kirschstein_2026_CVPR, author = {Kirschstein, Tobias and Giebenhain, Simon and Nie{\ss}ner, Matthias}, title = {FlexAvatar: Learning Complete 3D Head Avatars with Partial Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18193-18203} }
AnyDoc: Enhancing Document Generation via Large-Scale HTML/CSS Data Synthesis and Height-Aware Reinforcement Optimization: Jiawei Lin,

Wanrong Zhu,

Vlad I Morariu,

Christopher Tensmeyer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jiawei and Zhu, Wanrong and I Morariu, Vlad and Tensmeyer, Christopher}, title = {AnyDoc: Enhancing Document Generation via Large-Scale HTML/CSS Data Synthesis and Height-Aware Reinforcement Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {626-635} }
Breaking the 3D Dataset Bottleneck: Fast Scalable Generation of Aligned 3D Assets from Scratch for Category 6D Pose Estimation and Robotic Grasping: Duret Guillaume,

Danylo Mazurak,

Florence Zara,

Jan Peters,

Liming Chen; [pdf] [supp]
[bibtex]
@InProceedings{Guillaume_2026_CVPR, author = {Guillaume, Duret and Mazurak, Danylo and Zara, Florence and Peters, Jan and Chen, Liming}, title = {Breaking the 3D Dataset Bottleneck: Fast Scalable Generation of Aligned 3D Assets from Scratch for Category 6D Pose Estimation and Robotic Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1809-1818} }
Cross from Left to Right Brain: Adaptive Text Dreamer for Vision-and-Language Navigation: Pingrui Zhang,

Yifei Su,

Pengyuan Wu,

Dong An,

Li Zhang,

Zhigang Wang,

Dong Wang,

Bin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Pingrui and Su, Yifei and Wu, Pengyuan and An, Dong and Zhang, Li and Wang, Zhigang and Wang, Dong and Zhao, Bin}, title = {Cross from Left to Right Brain: Adaptive Text Dreamer for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1006-1019} }
The Road Less Seen: Segment Exploration for Weakly Supervised Video Anomaly Detection: Anusha Acharya,

Hitesh Sapkota,

Qi Yu,

Xumin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Acharya_2026_CVPR, author = {Acharya, Anusha and Sapkota, Hitesh and Yu, Qi and Liu, Xumin}, title = {The Road Less Seen: Segment Exploration for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14147-14156} }
SurgCoT: Advancing Spatiotemporal Reasoning in Surgical Videos through a Chain-of-Thought Benchmark: Gui Wang,

YongSong Zhou,

Kaijun Deng,

Wooi Ping Cheah,

Rong Qu,

Jianfeng Ren,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Gui and Zhou, YongSong and Deng, Kaijun and Cheah, Wooi Ping and Qu, Rong and Ren, Jianfeng and Shen, Linlin}, title = {SurgCoT: Advancing Spatiotemporal Reasoning in Surgical Videos through a Chain-of-Thought Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17011-17021} }
Easy3E: Feed-Forward 3D Asset Editing via Rectified Voxel Flow: Shimin Hu,

Yuanyi Wei,

Fei Zha,

Yudong Guo,

Juyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Shimin and Wei, Yuanyi and Zha, Fei and Guo, Yudong and Zhang, Juyong}, title = {Easy3E: Feed-Forward 3D Asset Editing via Rectified Voxel Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12730-12740} }
Scenes as Tokens: Multi-Scale Normal Distributions Transform Tokenizer for General 3D Vision-Language Understanding: Yutao Tang,

Cheng Zhao,

Gaurav Mittal,

Rohith Kukkala,

Rama Chellappa,

Cheng Peng,

Mei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yutao and Zhao, Cheng and Mittal, Gaurav and Kukkala, Rohith and Chellappa, Rama and Peng, Cheng and Chen, Mei}, title = {Scenes as Tokens: Multi-Scale Normal Distributions Transform Tokenizer for General 3D Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38584-38594} }
S$^2$AM3D: Scale-controllable Part Segmentation of 3D Point Clouds: Han Su,

Tianyu Huang,

Zichen Wan,

Xiaohe Wu,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Han and Huang, Tianyu and Wan, Zichen and Wu, Xiaohe and Zuo, Wangmeng}, title = {S\${\textasciicircum}2\$AM3D: Scale-controllable Part Segmentation of 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14357-14366} }
Query2Uncertainty: Robust Uncertainty Quantification and Calibration for 3D Object Detection under Distribution Shift: Till Beemelmanns,

Alexey Nekrasov,

Stefan Vilceanu,

Jonas Steinhaus,

Timo Woopen,

Bastian Leibe,

Lutz Eckstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beemelmanns_2026_CVPR, author = {Beemelmanns, Till and Nekrasov, Alexey and Vilceanu, Stefan and Steinhaus, Jonas and Woopen, Timo and Leibe, Bastian and Eckstein, Lutz}, title = {Query2Uncertainty: Robust Uncertainty Quantification and Calibration for 3D Object Detection under Distribution Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4677-4686} }
V^2-SAM: Marrying SAM2 with Multi-Prompt Experts for Cross-View Object Correspondence: Jiancheng Pan,

Runze Wang,

Tianwen Qian,

Mohammad Mahdi,

Yanwei Fu,

Xiangyang Xue,

Xiaomeng Huang,

Luc Van Gool,

Danda Pani Paudel,

Yuqian Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Jiancheng and Wang, Runze and Qian, Tianwen and Mahdi, Mohammad and Fu, Yanwei and Xue, Xiangyang and Huang, Xiaomeng and Van Gool, Luc and Paudel, Danda Pani and Fu, Yuqian}, title = {V{\textasciicircum}2-SAM: Marrying SAM2 with Multi-Prompt Experts for Cross-View Object Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16910-16919} }
PTC-Depth: Pose-Refined Monocular Depth Estimation with Temporal Consistency: Leezy Han,

Seunggyu Kim,

Dongseok Shim,

Hyeonbeom Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Leezy and Kim, Seunggyu and Shim, Dongseok and Lee, Hyeonbeom}, title = {PTC-Depth: Pose-Refined Monocular Depth Estimation with Temporal Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12617-12627} }
LumiX: Structured and Coherent Text-to-Intrinsic Generation: Xu Han,

Biao Zhang,

Xiangjun Tang,

Xianzhi Li,

Peter Wonka; [pdf] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xu and Zhang, Biao and Tang, Xiangjun and Li, Xianzhi and Wonka, Peter}, title = {LumiX: Structured and Coherent Text-to-Intrinsic Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21942-21952} }
Audio-sync Video Instance Editing with Granularity-Aware Mask Refiner: Haojie Zheng,

Shuchen Weng,

Jingqi Liu,

Siqi Yang,

Boxin Shi,

Xinlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haojie and Weng, Shuchen and Liu, Jingqi and Yang, Siqi and Shi, Boxin and Wang, Xinlong}, title = {Audio-sync Video Instance Editing with Granularity-Aware Mask Refiner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23150-23160} }
DiffusionFF: A Diffusion-based Framework for Joint Face Forgery Detection and Fine-Grained Artifact Localization: Siran Peng,

Haoyuan Zhang,

Li Gao,

Tianshuo Zhang,

Xiangyu Zhu,

Bao Li,

Weisong Zhao,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Siran and Zhang, Haoyuan and Gao, Li and Zhang, Tianshuo and Zhu, Xiangyu and Li, Bao and Zhao, Weisong and Lei, Zhen}, title = {DiffusionFF: A Diffusion-based Framework for Joint Face Forgery Detection and Fine-Grained Artifact Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14095-14105} }
ViterbiPlanNet: Injecting Procedural Knowledge via Differentiable Viterbi for Planning in Instructional Videos: Luigi Seminara,

Davide Moltisanti,

Antonino Furnari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seminara_2026_CVPR, author = {Seminara, Luigi and Moltisanti, Davide and Furnari, Antonino}, title = {ViterbiPlanNet: Injecting Procedural Knowledge via Differentiable Viterbi for Planning in Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31240-31249} }
CycleBEV: Regularizing View Transformation Networks via View Cycle Consistency for Bird's-Eye-View Semantic Segmentation: Jeongbin Hong,

Dooseop Choi,

Taeg-Hyun An,

Kyounghwan An,

Kyoung-Wook Min; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Jeongbin and Choi, Dooseop and An, Taeg-Hyun and An, Kyounghwan and Min, Kyoung-Wook}, title = {CycleBEV: Regularizing View Transformation Networks via View Cycle Consistency for Bird's-Eye-View Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10986-10995} }
A Mixed Diet Makes DINO An Omnivorous Vision Encoder: Rishabh Kabra,

Maks Ovsjanikov,

Drew A. Hudson,

Ye Xia,

Skanda Koppula,

Andre Araujo,

Joao Carreira,

Niloy J. Mitra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kabra_2026_CVPR, author = {Kabra, Rishabh and Ovsjanikov, Maks and Hudson, Drew A. and Xia, Ye and Koppula, Skanda and Araujo, Andre and Carreira, Joao and Mitra, Niloy J.}, title = {A Mixed Diet Makes DINO An Omnivorous Vision Encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36850-36860} }
HiDRA: Hierarchical Degradation Representation and Adaptation with Generative Priors for Enhancing Infrared Vision: Zihang Chen,

Zhu Liu,

Changbo Yan,

Jinyuan Liu,

Risheng Liu; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zihang and Liu, Zhu and Yan, Changbo and Liu, Jinyuan and Liu, Risheng}, title = {HiDRA: Hierarchical Degradation Representation and Adaptation with Generative Priors for Enhancing Infrared Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37434-37444} }
Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge: Yu Huang,

Zelin Peng,

Changsong Wen,

Xiaokang Yang,

Wei Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yu and Peng, Zelin and Wen, Changsong and Yang, Xiaokang and Shen, Wei}, title = {Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6399-6409} }
Omni-AD: A Large-scale and Versatile Benchmark for Industrial Anomaly Detection: Dahu Shi,

Chengshen He,

Shaochen Zhang,

Bo Qian,

Xiaochen Quan,

Wencong Zhang,

Xing Wei; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Dahu and He, Chengshen and Zhang, Shaochen and Qian, Bo and Quan, Xiaochen and Zhang, Wencong and Wei, Xing}, title = {Omni-AD: A Large-scale and Versatile Benchmark for Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14157-14166} }
SCE-Depth: A Spherical Compound Eye Framework for Wide FOV Depth Estimation: Yi Zhu,

Hao Xiong,

Lin Xiao,

Ranfeng Shi,

Qinying Gu,

Leilei Gu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yi and Xiong, Hao and Xiao, Lin and Shi, Ranfeng and Gu, Qinying and Gu, Leilei}, title = {SCE-Depth: A Spherical Compound Eye Framework for Wide FOV Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26964-26973} }
GaussianDWM: 3D Gaussian Driving World Model for Unified Scene Understanding and Multi-Modal Generation: Tianchen Deng,

Xuefeng Chen,

Yi Chen,

Qu Chen,

Yuyao Xu,

Lijin Yang,

Le Xu,

Yu Zhang,

Bo Zhang,

Wuxiong Huang,

Hesheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Tianchen and Chen, Xuefeng and Chen, Yi and Chen, Qu and Xu, Yuyao and Yang, Lijin and Xu, Le and Zhang, Yu and Zhang, Bo and Huang, Wuxiong and Wang, Hesheng}, title = {GaussianDWM: 3D Gaussian Driving World Model for Unified Scene Understanding and Multi-Modal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10656-10667} }
Watch and Learn: Learning to Use Computers from Online Videos: Chan Hee Song,

Yiwen Song,

Palash Goyal,

Yu Su,

Oriana Riva,

Hamid Palangi,

Tomas Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Chan Hee and Song, Yiwen and Goyal, Palash and Su, Yu and Riva, Oriana and Palangi, Hamid and Pfister, Tomas}, title = {Watch and Learn: Learning to Use Computers from Online Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5421-5431} }
VRR-QA: Visual Relational Reasoning in Videos Beyond Explicit Cues: Sirnam Swetha,

Rohit Gupta,

Parth Parag Kulkarni,

David G Shatwell,

Jeffrey A Chan Santiago,

Nyle Siddiqui,

Joseph Fioresi,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Swetha_2026_CVPR, author = {Swetha, Sirnam and Gupta, Rohit and Kulkarni, Parth Parag and Shatwell, David G and A Chan Santiago, Jeffrey and Siddiqui, Nyle and Fioresi, Joseph and Shah, Mubarak}, title = {VRR-QA: Visual Relational Reasoning in Videos Beyond Explicit Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32840-32849} }
ESAM++: Efficient Online 3D Perception on the Edge: Qin Liu,

Lavisha Aggarwal,

Saptarashmi Bandyopadhyay,

Vikas Bahirwani,

Marc Niethammer,

Ehsan Adeli,

Andrea Colaco; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Qin and Aggarwal, Lavisha and Bandyopadhyay, Saptarashmi and Bahirwani, Vikas and Niethammer, Marc and Adeli, Ehsan and Colaco, Andrea}, title = {ESAM++: Efficient Online 3D Perception on the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39021-39030} }
CHAL: Causal-guided Hierarchical Anomaly-aware Learning for Moving Infrared Small Target Detection: Weiwei Duan,

Luping Ji,

Shipeng Lei,

Sicheng Zhu,

Jianghong Huang,

Mao Ye; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Weiwei and Ji, Luping and Lei, Shipeng and Zhu, Sicheng and Huang, Jianghong and Ye, Mao}, title = {CHAL: Causal-guided Hierarchical Anomaly-aware Learning for Moving Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21357-21366} }
Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization: Hanchao Liu,

Fang-Lue Zhang,

Shining Zhang,

Tai-Jiang Mu,

Shi-Min Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hanchao and Zhang, Fang-Lue and Zhang, Shining and Mu, Tai-Jiang and Hu, Shi-Min}, title = {Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38291-38301} }
DeepProtect: Proactive Face-Swapping Defense using Identity Blending and Attribute Distortion: Eungi Lee,

Seung-hyeok Back,

Hyung-Il Kim,

Seok Bong Yoo; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Eungi and Back, Seung-hyeok and Kim, Hyung-Il and Yoo, Seok Bong}, title = {DeepProtect: Proactive Face-Swapping Defense using Identity Blending and Attribute Distortion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6569-6579} }
R3-PCQA: Ray-Reprojection-Reinforcement for No-Reference 3D Point Cloud Quality Assessment: Junhyuk Seo,

Sanghyuk Seo,

Dawoon Kim,

Heeseok Oh; [pdf] [supp]
[bibtex]
@InProceedings{Seo_2026_CVPR, author = {Seo, Junhyuk and Seo, Sanghyuk and Kim, Dawoon and Oh, Heeseok}, title = {R3-PCQA: Ray-Reprojection-Reinforcement for No-Reference 3D Point Cloud Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9996-10005} }
Learning Anchor in Dual Orthogonal Space for Fast Multi-view Clustering: Yalan Qin,

Hanzhou Wu; [pdf]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yalan and Wu, Hanzhou}, title = {Learning Anchor in Dual Orthogonal Space for Fast Multi-view Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1616-1626} }
History to Future: Evolving Agent with Experience and Thought for Zero-shot Vision-and-Language Navigation: Guangzhao Dai,

Shuo Wang,

Zihan Wang,

Guo-Sen Xie,

Yang Yang,

Jinshan Pan,

Qianru Sun,

Xiangbo Shu; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Guangzhao and Wang, Shuo and Wang, Zihan and Xie, Guo-Sen and Yang, Yang and Pan, Jinshan and Sun, Qianru and Shu, Xiangbo}, title = {History to Future: Evolving Agent with Experience and Thought for Zero-shot Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15177-15187} }
CTCal: Rethinking Text-to-Image Diffusion Models via Cross-Timestep Self-Calibration: Xiefan Guo,

Xinzhu Ma,

Haiyu Zhang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Xiefan and Ma, Xinzhu and Zhang, Haiyu and Huang, Di}, title = {CTCal: Rethinking Text-to-Image Diffusion Models via Cross-Timestep Self-Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43558-43567} }
Learning Spatial-Temporal Consistency for 3D Semantic Scene Completion: Yujie Xue,

Meng Wang,

Ruihui Li,

Fan Wu,

Zhizhong Liu,

Zhuo Tang,

Kenli Li; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Yujie and Wang, Meng and Li, Ruihui and Wu, Fan and Liu, Zhizhong and Tang, Zhuo and Li, Kenli}, title = {Learning Spatial-Temporal Consistency for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28567-28577} }
StereoWorld: Geometry-Aware Monocular-to-Stereo Video Generation: Ke Xing,

Longfei Li,

Yuyang Yin,

Hanwen Liang,

Guixun Luo,

Chen Fang,

Jue Wang,

Konstantinos N. Plataniotis,

Xiaojie Jin,

Yao Zhao,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Ke and Li, Longfei and Yin, Yuyang and Liang, Hanwen and Luo, Guixun and Fang, Chen and Wang, Jue and Plataniotis, Konstantinos N. and Jin, Xiaojie and Zhao, Yao and Wei, Yunchao}, title = {StereoWorld: Geometry-Aware Monocular-to-Stereo Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40245-40255} }
Low-Rank Test-Time Training for Pre-Trained Point Cloud Models: Ouyangzi Ye,

Feifei Shao,

Kexin Li,

Yawei Luo,

Zikai Song,

Ping Liu,

Fengda Zhang,

Hongwei Wang,

Jun Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Ouyangzi and Shao, Feifei and Li, Kexin and Luo, Yawei and Song, Zikai and Liu, Ping and Zhang, Fengda and Wang, Hongwei and Xiao, Jun}, title = {Low-Rank Test-Time Training for Pre-Trained Point Cloud Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31472-31481} }
DriveMoE: Mixture-of-Experts for Vision-Language-Action Model in End-to-End Autonomous Driving: Zhenjie Yang,

Yilin Chai,

Xiaosong Jia,

Qifeng Li,

Yuqian Shao,

Xuekai Zhu,

Haisheng Su,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenjie and Chai, Yilin and Jia, Xiaosong and Li, Qifeng and Shao, Yuqian and Zhu, Xuekai and Su, Haisheng and Yan, Junchi}, title = {DriveMoE: Mixture-of-Experts for Vision-Language-Action Model in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10678-10688} }
AgentDet: A Shared-Blackboard Multi-Agent Framework for Zero-/Few-Shot Object Detection: Haolin Li,

Yaohua Wang,

Ze Yan,

Lijie Wen,

Biqing Huang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haolin and Wang, Yaohua and Yan, Ze and Wen, Lijie and Huang, Biqing}, title = {AgentDet: A Shared-Blackboard Multi-Agent Framework for Zero-/Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41626-41635} }
Pixel2Phys: Distilling Governing Laws from Visual Dynamics: Ruikun Li,

Jun Yao,

Yingfan Hua,

Shixiang Tang,

Biqing Qi,

Bin Liu,

Wanli Ouyang,

Yan Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ruikun and Yao, Jun and Hua, Yingfan and Tang, Shixiang and Qi, Biqing and Liu, Bin and Ouyang, Wanli and Lu, Yan}, title = {Pixel2Phys: Distilling Governing Laws from Visual Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41426-41435} }
Deformable Gaussian Occupancy: Decoupling Rigid and Nonrigid Motion with Factorized Distillation: Yang Gao,

Wuyang Li,

Po-Chien Luan,

Alexandre Alahi; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yang and Li, Wuyang and Luan, Po-Chien and Alahi, Alexandre}, title = {Deformable Gaussian Occupancy: Decoupling Rigid and Nonrigid Motion with Factorized Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28588-28598} }
Exposing Functional Fusion: A New Class of Strategic Backdoor in Dynamic Prompt Architectures: Zeyao Liu,

Zhendong Zhao,

Xiaojun Chen,

Xin Zhao,

Yuexin Xuan,

Xiaoshuang Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zeyao and Zhao, Zhendong and Chen, Xiaojun and Zhao, Xin and Xuan, Yuexin and Ji, Xiaoshuang}, title = {Exposing Functional Fusion: A New Class of Strategic Backdoor in Dynamic Prompt Architectures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13376-13385} }
Are We Ready for RL in Text-to-3D Generation? A Progressive Investigation: Yiwen Tang,

Zoey Guo,

Kaixin Zhu,

Ray Zhang,

Qizhi Chen,

Dongzhi Jiang,

Junli Liu,

Bohan Zeng,

Haoming Song,

Delin Qu,

Tianyi Bai,

Dan Xu,

Wentao Zhang,

Bin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yiwen and Guo, Zoey and Zhu, Kaixin and Zhang, Ray and Chen, Qizhi and Jiang, Dongzhi and Liu, Junli and Zeng, Bohan and Song, Haoming and Qu, Delin and Bai, Tianyi and Xu, Dan and Zhang, Wentao and Zhao, Bin}, title = {Are We Ready for RL in Text-to-3D Generation? A Progressive Investigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3197-3207} }
Modeling the Brain's Grammar: ROI-Guided fMRI Pretraining for Transferable and Interpretable Vision Decoding: Yulong Liu,

Hua Xu,

Yiyang Cai,

Chunyang Jiang,

Sirui Han,

Yike Guo; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yulong and Xu, Hua and Cai, Yiyang and Jiang, Chunyang and Han, Sirui and Guo, Yike}, title = {Modeling the Brain's Grammar: ROI-Guided fMRI Pretraining for Transferable and Interpretable Vision Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6900-6909} }
Echoes Over Time: Unlocking Length Generalization in Video-to-Audio Generation Models: Christian Simon,

Masato Ishii,

Wei-Yao Wang,

Koichi Saito,

Akio Hayakawa,

Dongseok Shim,

Zhi Zhong,

Shuyang Cui,

Takashi Shibuya,

Shusuke Takahashi,

Yuki Mitsufuji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Simon_2026_CVPR, author = {Simon, Christian and Ishii, Masato and Wang, Wei-Yao and Saito, Koichi and Hayakawa, Akio and Shim, Dongseok and Zhong, Zhi and Cui, Shuyang and Shibuya, Takashi and Takahashi, Shusuke and Mitsufuji, Yuki}, title = {Echoes Over Time: Unlocking Length Generalization in Video-to-Audio Generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15840-15849} }
Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models: Tianci Bi,

Xiaoyi Zhang,

Yan Lu,

Nanning Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bi_2026_CVPR, author = {Bi, Tianci and Zhang, Xiaoyi and Lu, Yan and Zheng, Nanning}, title = {Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43310-43319} }
Flowception: Temporally Expansive Flow Matching for Video Generation: Tariq Berrada Ifriqi,

John Nguyen,

Karteek Alahari,

Jakob Verbeek,

Ricky T. Q. Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ifriqi_2026_CVPR, author = {Ifriqi, Tariq Berrada and Nguyen, John and Alahari, Karteek and Verbeek, Jakob and Chen, Ricky T. Q.}, title = {Flowception: Temporally Expansive Flow Matching for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16185-16195} }
OSPO: Object-Centric Self-Improving Preference Optimization for Text-to-Image Generation: Yoonjin Oh,

Yongjin Kim,

Hyomin Kim,

Donghwan Chi,

Sungwoong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Yoonjin and Kim, Yongjin and Kim, Hyomin and Chi, Donghwan and Kim, Sungwoong}, title = {OSPO: Object-Centric Self-Improving Preference Optimization for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7620-7631} }
EVLF: Early Vision-Language Fusion for Generative Dataset Distillation: Wenqi Cai,

Yawen Zou,

Guang Li,

Chunzhi Gu,

Chao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Wenqi and Zou, Yawen and Li, Guang and Gu, Chunzhi and Zhang, Chao}, title = {EVLF: Early Vision-Language Fusion for Generative Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33953-33962} }
UFVideo: Towards Unified Fine-Grained Video Cooperative Understanding with Large Language Models: Hewen Pan,

Cong Wei,

Dashuang Liang,

Zepeng Huang,

Pengfei Gao,

Ziqi Zhou,

Lulu Xue,

Pengfei Yan,

Xiaoming Wei,

Minghui Li,

Shengshan Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Hewen and Wei, Cong and Liang, Dashuang and Huang, Zepeng and Gao, Pengfei and Zhou, Ziqi and Xue, Lulu and Yan, Pengfei and Wei, Xiaoming and Li, Minghui and Hu, Shengshan}, title = {UFVideo: Towards Unified Fine-Grained Video Cooperative Understanding with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31833-31844} }
DeRVOS: Decoupling Consistent Trajectory Generation and Multimodal Understanding for Referring Video Object Segmentation: Wenxuan Cheng,

Ming Dai,

Huimin Lu,

Wankou Yang; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Wenxuan and Dai, Ming and Lu, Huimin and Yang, Wankou}, title = {DeRVOS: Decoupling Consistent Trajectory Generation and Multimodal Understanding for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24651-24662} }
Which Concepts to Forget and How to Refuse? Decomposing Concepts for Continual Unlearning in Large Vision-Language Models: Hyundong Jin,

Dongyoon Han,

Eunwoo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Hyundong and Han, Dongyoon and Kim, Eunwoo}, title = {Which Concepts to Forget and How to Refuse? Decomposing Concepts for Continual Unlearning in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32288-32298} }
Divide, then Ground: Adapting Frame Selection to Query Types for Long-Form Video Understanding: Jialuo Li,

Bin Li,

Jiahao Li,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jialuo and Li, Bin and Li, Jiahao and Lu, Yan}, title = {Divide, then Ground: Adapting Frame Selection to Query Types for Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11369-11380} }
SR3R: Rethinking Super-Resolution 3D Reconstruction With Feed-Forward Gaussian Splatting: Xiang Feng,

Xiangbo Wang,

Tieshi Zhong,

Chengkai Wang,

Yiting Zhao,

Tianxiang Xu,

Zhenzhong Kuang,

Feiwei Qin,

Xuefei Yin,

Yanming Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Xiang and Wang, Xiangbo and Zhong, Tieshi and Wang, Chengkai and Zhao, Yiting and Xu, Tianxiang and Kuang, Zhenzhong and Qin, Feiwei and Yin, Xuefei and Zhu, Yanming}, title = {SR3R: Rethinking Super-Resolution 3D Reconstruction With Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33384-33393} }
Diffusion Guided Chain-of-Vision for Large Autoregressive Vision Models: Xinyang Wang,

Kecheng Zheng,

Minfeng Zhu,

Wei Wu,

Fan Lu,

Wei Zhai,

Wei Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyang and Zheng, Kecheng and Zhu, Minfeng and Wu, Wei and Lu, Fan and Zhai, Wei and Chen, Wei}, title = {Diffusion Guided Chain-of-Vision for Large Autoregressive Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2357-2368} }
Spatial Retrieval Augmented Autonomous Driving: Xiaosong Jia,

Chenhe Zhang,

Yule Jiang,

Songbur Wong,

Zhiyuan Zhang,

Chen Chen,

Shaofeng Zhang,

Xuanhe Zhou,

Xue Yang,

Junchi Yan,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Xiaosong and Zhang, Chenhe and Jiang, Yule and Wong, Songbur and Zhang, Zhiyuan and Chen, Chen and Zhang, Shaofeng and Zhou, Xuanhe and Yang, Xue and Yan, Junchi and Jiang, Yu-Gang}, title = {Spatial Retrieval Augmented Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17787-17797} }
WeatherCity: Urban Scene Reconstruction with Controllable Multi-Weather Transformation: Wenhua Wu,

Huai Guan,

Zhe Liu,

Hesheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Wenhua and Guan, Huai and Liu, Zhe and Wang, Hesheng}, title = {WeatherCity: Urban Scene Reconstruction with Controllable Multi-Weather Transformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40949-40958} }
Protego: User-Centric Pose-Invariant Privacy Protection Against Face Recognition-Induced Digital Footprint Exposure: Ziling Wang,

Shuya Yang,

Jialin Lu,

Ka-Ho Chow; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziling and Yang, Shuya and Lu, Jialin and Chow, Ka-Ho}, title = {Protego: User-Centric Pose-Invariant Privacy Protection Against Face Recognition-Induced Digital Footprint Exposure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10293-10302} }
RMIR: A Benchmark Dataset for Reasoning-Intensive Multimodal Image Retrieval: Yijiang Li,

Kunal Kotian,

Ali Marjaninejad,

Meir Friedenberg,

Kaushik Pavani,

Sunny Dasgupta; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yijiang and Kotian, Kunal and Marjaninejad, Ali and Friedenberg, Meir and Pavani, Kaushik and Dasgupta, Sunny}, title = {RMIR: A Benchmark Dataset for Reasoning-Intensive Multimodal Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2725-2734} }
MeshWeaver: Sparse-Voxel-Guided Surface Weaving for Autoregressive Mesh Generation: Jiale Xu,

Wang Zhao,

Ying Shan; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiale and Zhao, Wang and Shan, Ying}, title = {MeshWeaver: Sparse-Voxel-Guided Surface Weaving for Autoregressive Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5912-5922} }
TM-BSN: Triangular-Masked Blind-Spot Network for Real-World Self-Supervised Image Denoising: Junyoung Park,

Youngjin Oh,

Nam Ik Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Junyoung and Oh, Youngjin and Cho, Nam Ik}, title = {TM-BSN: Triangular-Masked Blind-Spot Network for Real-World Self-Supervised Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29877-29886} }
ReMoT: Reinforcement Learning with Motion Contrast Triplets: Cong Wan,

Zeyu Guo,

Jiangyang Li,

Songlin Dong,

Yifan Bai,

Lin Peng,

Zhiheng Ma,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Cong and Guo, Zeyu and Li, Jiangyang and Dong, Songlin and Bai, Yifan and Peng, Lin and Ma, Zhiheng and Gong, Yihong}, title = {ReMoT: Reinforcement Learning with Motion Contrast Triplets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5487-5498} }
LLMind: Bio-inspired Training-free Adaptive Visual Representations for Vision-Language Models: Soumyaratna Debnath,

Bui Duc Manh,

Zinan Liu,

Lin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Debnath_2026_CVPR, author = {Debnath, Soumyaratna and Manh, Bui Duc and Liu, Zinan and Wang, Lin}, title = {LLMind: Bio-inspired Training-free Adaptive Visual Representations for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3133-3142} }
TextOVSR: Text-Guided Real-World Opera Video Super-Resolution: Hua Chang,

Xin Xu,

Wei Liu,

Jiayi Wu,

Kui Jiang,

Fei Ma,

Qi Tian; [pdf] [arXiv]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Hua and Xu, Xin and Liu, Wei and Wu, Jiayi and Jiang, Kui and Ma, Fei and Tian, Qi}, title = {TextOVSR: Text-Guided Real-World Opera Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2156-2165} }
Garments2Look: A Multi-Reference Dataset for High-Fidelity Outfit-Level Virtual Try-On with Clothing and Accessories: Junyao Hu,

Zhongwei Cheng,

Waikeung Wong,

Xingxing Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Junyao and Cheng, Zhongwei and Wong, Waikeung and Zou, Xingxing}, title = {Garments2Look: A Multi-Reference Dataset for High-Fidelity Outfit-Level Virtual Try-On with Clothing and Accessories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1123-1133} }
Decompose, Mix, Adapt: A Unified Framework for Parameter-Efficient Neural Network Recombination and Compression: Nazia Tasnim,

Shrimai Prabhumoye,

Bryan A. Plummer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tasnim_2026_CVPR, author = {Tasnim, Nazia and Prabhumoye, Shrimai and Plummer, Bryan A.}, title = {Decompose, Mix, Adapt: A Unified Framework for Parameter-Efficient Neural Network Recombination and Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19380-19392} }
Text-Driven 3D Hand Motion Generation from Sign Language Data: Léore Bensabath,

Mathis Petrovich,

Gül Varol; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bensabath_2026_CVPR, author = {Bensabath, L\'eore and Petrovich, Mathis and Varol, G\"ul}, title = {Text-Driven 3D Hand Motion Generation from Sign Language Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23095-23105} }
Adaptive Capacity Autoregressive Visual Tracking: Tong Lin,

Yifan Bai,

Shiyi Liang,

Ruigang Niu,

Xing Wei; [pdf]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Tong and Bai, Yifan and Liang, Shiyi and Niu, Ruigang and Wei, Xing}, title = {Adaptive Capacity Autoregressive Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13574-13583} }
Stable Mean Flow: Lyapunov-Inspired One-Step Flow Matching: Guangxun Zhang,

Mason Haberle,

Davi Geiger; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guangxun and Haberle, Mason and Geiger, Davi}, title = {Stable Mean Flow: Lyapunov-Inspired One-Step Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9223-9232} }
Incentivizing Generative Zero-Shot Learning via Outcome-Reward Reinforcement Learning with Visual Cues: Wenjin Hou,

Xiaoxiao Sun,

Hehe Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Wenjin and Sun, Xiaoxiao and Fan, Hehe}, title = {Incentivizing Generative Zero-Shot Learning via Outcome-Reward Reinforcement Learning with Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5499-5510} }
Steering Where to Diffuse: Generative Modeling of Phenotypic Response Simulation with Steered Diffusion Bridge: Rongchao Zhang,

Chengxin Li,

Yiwei Lou,

Yuling Shi,

Hanpin Wang,

Yu Huang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Rongchao and Li, Chengxin and Lou, Yiwei and Shi, Yuling and Wang, Hanpin and Huang, Yu}, title = {Steering Where to Diffuse: Generative Modeling of Phenotypic Response Simulation with Steered Diffusion Bridge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27367-27377} }
SoPE: Spherical Coordinate-Based Positional Embedding for Enhancing Spatial Perception of 3D LVLMs: Koonting Yip,

Qiyan Zhao,

Wenhao Yu,

Liangyu Yuen,

Mingkai Li,

Xiaofeng Zhang,

Jianmin Ji,

Yanyong Zhang,

Qing Jiang,

Ka-Veng Yuen; [pdf] [supp]
[bibtex]
@InProceedings{Yip_2026_CVPR, author = {Yip, Koonting and Zhao, Qiyan and Yu, Wenhao and Yuen, Liangyu and Li, Mingkai and Zhang, Xiaofeng and Ji, Jianmin and Zhang, Yanyong and Jiang, Qing and Yuen, Ka-Veng}, title = {SoPE: Spherical Coordinate-Based Positional Embedding for Enhancing Spatial Perception of 3D LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33714-33726} }
3D-Fixer: Coarse-to-Fine In-place Completion for 3D Scenes from a Single Image: Ze-Xin Yin,

Liu Liu,

Xinjie Wang,

Wei Sui,

Zhizhong Su,

Jian Yang,

Jin Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Ze-Xin and Liu, Liu and Wang, Xinjie and Sui, Wei and Su, Zhizhong and Yang, Jian and Xie, Jin}, title = {3D-Fixer: Coarse-to-Fine In-place Completion for 3D Scenes from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12753-12763} }
EduDiag: A Benchmark for Educational Diagnostic Reasoning with Error Tracing and Correction on Large Multimodal Models: Jiali Chen,

Yuqi Xue,

Xusen Hei,

DingBa Fu,

Yuancheng Wei,

Jiayuan Xie,

Yi Cai; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiali and Xue, Yuqi and Hei, Xusen and Fu, DingBa and Wei, Yuancheng and Xie, Jiayuan and Cai, Yi}, title = {EduDiag: A Benchmark for Educational Diagnostic Reasoning with Error Tracing and Correction on Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15890-15901} }
ColaVLA: Leveraging Cognitive Latent Reasoning for Hierarchical Parallel Trajectory Planning in Autonomous Driving: Qihang Peng,

Xuesong Chen,

Chenye Yang,

Shaoshuai Shi,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Qihang and Chen, Xuesong and Yang, Chenye and Shi, Shaoshuai and Li, Hongsheng}, title = {ColaVLA: Leveraging Cognitive Latent Reasoning for Hierarchical Parallel Trajectory Planning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17809-17819} }
Breaking the Illusion: When Positive Meets Negative in Multimodal Decoding: Yubo Jiang,

Yitong An,

Xin Yang,

Abudukelimu Wuerkaixi,

Xuxin Cheng,

Fengying Xie,

Zhiguo Jiang,

Cao Liu,

Ke Zeng,

Haopeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yubo and An, Yitong and Yang, Xin and Wuerkaixi, Abudukelimu and Cheng, Xuxin and Xie, Fengying and Jiang, Zhiguo and Liu, Cao and Zeng, Ke and Zhang, Haopeng}, title = {Breaking the Illusion: When Positive Meets Negative in Multimodal Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4210-4220} }
UniPixie: Unified and Probabilistic 3D Physics Learning via Flow Matching: Qilin Huang,

Quynh Anh Huynh,

Long Le,

Chen Wang,

Chuhao Chen,

Ryan Lucas,

Eric Eaton,

Lingjie Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qilin and Huynh, Quynh Anh and Le, Long and Wang, Chen and Chen, Chuhao and Lucas, Ryan and Eaton, Eric and Liu, Lingjie}, title = {UniPixie: Unified and Probabilistic 3D Physics Learning via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19907-19916} }
Forecasting 3D Scanpaths in Egocentric Video: Fiona Ryan,

Ishwarya Ananthabhotla,

Yijun Qian,

Judy Hoffman,

James M. Rehg,

Vamsi Krishna Ithapu,

Calvin Murdock; [pdf] [supp]
[bibtex]
@InProceedings{Ryan_2026_CVPR, author = {Ryan, Fiona and Ananthabhotla, Ishwarya and Qian, Yijun and Hoffman, Judy and Rehg, James M. and Ithapu, Vamsi Krishna and Murdock, Calvin}, title = {Forecasting 3D Scanpaths in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42824-42835} }
SimLBR: Learning to Detect Fake Images by Learning to Detect Real Images: Aayush Dhakal,

Subash Khanal,

Srikumar Sastry,

Jacob Arndt,

Philipe Dias,

Dalton Lunga,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dhakal_2026_CVPR, author = {Dhakal, Aayush and Khanal, Subash and Sastry, Srikumar and Arndt, Jacob and Dias, Philipe and Lunga, Dalton and Jacobs, Nathan}, title = {SimLBR: Learning to Detect Fake Images by Learning to Detect Real Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35472-35482} }
$L^{2}DGS$: Low-Light Dynamic Gaussian Splatting: Ashish Kumar,

Rajagopalan N Ambasamduram; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and Ambasamduram, Rajagopalan N}, title = {\$L{\textasciicircum}\{2\}DGS\$: Low-Light Dynamic Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19096-19106} }
Boosting Visual Reprogramming for CLIP with Dual Granularity Alignment: Jiayang Wu,

Xinyang Chen,

Ke Lv,

Weili Guan; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jiayang and Chen, Xinyang and Lv, Ke and Guan, Weili}, title = {Boosting Visual Reprogramming for CLIP with Dual Granularity Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29347-29356} }
CrowdGaussian: Reconstructing High-Fidelity 3D Gaussians for Human Crowd from a Single Image: Yizheng Song,

Yiyu Zhuang,

Qipeng Xu,

Haixiang Wang,

Jiahe Zhu,

Jing Tian,

Siyu Zhu,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yizheng and Zhuang, Yiyu and Xu, Qipeng and Wang, Haixiang and Zhu, Jiahe and Tian, Jing and Zhu, Siyu and Zhu, Hao}, title = {CrowdGaussian: Reconstructing High-Fidelity 3D Gaussians for Human Crowd from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11006-11016} }
Granulon: Awakening Pixel-Level Visual Encoders with Adaptive Multi-Granularity Semantics for MLLM: Junyuan Mao,

Qiankun Li,

Linghao Meng,

Zhicheng He,

Xinliang Zhou,

Kun Wang,

Yang Liu,

Yueming Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Junyuan and Li, Qiankun and Meng, Linghao and He, Zhicheng and Zhou, Xinliang and Wang, Kun and Liu, Yang and Jin, Yueming}, title = {Granulon: Awakening Pixel-Level Visual Encoders with Adaptive Multi-Granularity Semantics for MLLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26317-26327} }
AtomicVLA: Unlocking the Potential of Atomic Skill Learning in Robots: Likui Zhang,

Tao Tang,

Zhihao Zhan,

Xiuwei Chen,

Zisheng Chen,

Jianhua Han,

Jiangtong Zhu,

Pei Xu,

Hang Xu,

Hefeng Wu,

Liang Lin,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Likui and Tang, Tao and Zhan, Zhihao and Chen, Xiuwei and Chen, Zisheng and Han, Jianhua and Zhu, Jiangtong and Xu, Pei and Xu, Hang and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {AtomicVLA: Unlocking the Potential of Atomic Skill Learning in Robots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20743-20754} }
Phased DMD: Few-step Distribution Matching Distillation via Score Matching within Subintervals: Xiangyu Fan,

Zesong Qiu,

Zhuguanyu Wu,

Fanzhou Wang,

Zhiqian Lin,

Tianxiang Ren,

Dahua Lin,

Ruihao Gong,

Lei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Xiangyu and Qiu, Zesong and Wu, Zhuguanyu and Wang, Fanzhou and Lin, Zhiqian and Ren, Tianxiang and Lin, Dahua and Gong, Ruihao and Yang, Lei}, title = {Phased DMD: Few-step Distribution Matching Distillation via Score Matching within Subintervals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41667-41676} }
EffectErase: Joint Video Object Removal and Insertion for High-Quality Effect Erasing: Yang Fu,

Yike Zheng,

Ziyun Dai,

Henghui Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Yang and Zheng, Yike and Dai, Ziyun and Ding, Henghui}, title = {EffectErase: Joint Video Object Removal and Insertion for High-Quality Effect Erasing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2005-2014} }
SOUPLE: Enhancing Audio-Visual Localization and Segmentation with Learnable Prompt Contexts: Khanh Binh Nguyen,

Chae Jung Park; [pdf] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Khanh Binh and Park, Chae Jung}, title = {SOUPLE: Enhancing Audio-Visual Localization and Segmentation with Learnable Prompt Contexts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8674-8683} }
ReDirector: Creating Any-Length Video Retakes with Rotary Camera Encoding: Byeongjun Park,

Byung-Hoon Kim,

Hyungjin Chung,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Byeongjun and Kim, Byung-Hoon and Chung, Hyungjin and Ye, Jong Chul}, title = {ReDirector: Creating Any-Length Video Retakes with Rotary Camera Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11163-11173} }
Heuristic-inspired Reasoning Priors Facilitate Data-Efficient Referring Object Detection: Xu Zhang,

Zhe Chen,

Jing Zhang,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xu and Chen, Zhe and Zhang, Jing and Tao, Dacheng}, title = {Heuristic-inspired Reasoning Priors Facilitate Data-Efficient Referring Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10082-10092} }
GaussianFluent: Gaussian Simulation for Dynamic Scenes with Mixed Materials: Bei Huang,

Yixin Chen,

Ruijie Lu,

Gang Zeng,

Hongbin Zha,

Yuru Pei,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Bei and Chen, Yixin and Lu, Ruijie and Zeng, Gang and Zha, Hongbin and Pei, Yuru and Huang, Siyuan}, title = {GaussianFluent: Gaussian Simulation for Dynamic Scenes with Mixed Materials}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21583-21593} }
SoC: Semantic Orthogonal Calibration for Test-Time Prompt Tuning: Leo Fillioux,

Omprakash Chakraborty,

Ismail Ben Ayed,

Paul-Henry Cournède,

Stergios Christodoulidis,

Maria Vakalopoulou,

Jose Dolz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fillioux_2026_CVPR, author = {Fillioux, Leo and Chakraborty, Omprakash and Ben Ayed, Ismail and Courn\`ede, Paul-Henry and Christodoulidis, Stergios and Vakalopoulou, Maria and Dolz, Jose}, title = {SoC: Semantic Orthogonal Calibration for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4772-4782} }
Learning a Unified Latent Action Space from Videos with Action-centric Cycle Consistency: Guangyan Chen,

Qi Shao,

Te Cui,

Zichen Zhou,

Weixin Mao,

Luojie Yang,

Meiling Wang,

Yi Yang,

Hua Chen,

Yufeng Yue; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Guangyan and Shao, Qi and Cui, Te and Zhou, Zichen and Mao, Weixin and Yang, Luojie and Wang, Meiling and Yang, Yi and Chen, Hua and Yue, Yufeng}, title = {Learning a Unified Latent Action Space from Videos with Action-centric Cycle Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12871-12880} }
FaceCam: Portrait Video Camera Control via Scale-Aware Conditioning: Weijie Lyu,

Ming-Hsuan Yang,

Zhixin Shu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Weijie and Yang, Ming-Hsuan and Shu, Zhixin}, title = {FaceCam: Portrait Video Camera Control via Scale-Aware Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30607-30617} }
Language-Free Generative Editing from One Visual Example: Omar Elezabi,

Eduard Zamfir,

Zongwei Wu,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elezabi_2026_CVPR, author = {Elezabi, Omar and Zamfir, Eduard and Wu, Zongwei and Timofte, Radu}, title = {Language-Free Generative Editing from One Visual Example}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1078-1088} }
ORION: ORthonormal Text Encoding for Universal VLM AdaptatION: Omprakash Chakraborty,

Jose Dolz,

Ismail Ben Ayed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chakraborty_2026_CVPR, author = {Chakraborty, Omprakash and Dolz, Jose and Ben Ayed, Ismail}, title = {ORION: ORthonormal Text Encoding for Universal VLM AdaptatION}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31556-31565} }
Mobile-VTON: High-Fidelity On-Device Virtual Try-On: Zhenchen Wan,

Ce Chen,

Runqi Lin,

Jiaxin Huang,

Tianxi Chen,

Yanwu Xu,

Tongliang Liu,

Mingming Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Zhenchen and Chen, Ce and Lin, Runqi and Huang, Jiaxin and Chen, Tianxi and Xu, Yanwu and Liu, Tongliang and Gong, Mingming}, title = {Mobile-VTON: High-Fidelity On-Device Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38081-38090} }
EmoTaG: Emotion-Aware Talking Head Synthesis on Gaussian Splatting with Few-Shot Personalization: Haolan Xu,

Keli Cheng,

Lei Wang,

Ning Bi,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Haolan and Cheng, Keli and Wang, Lei and Bi, Ning and Liu, Xiaoming}, title = {EmoTaG: Emotion-Aware Talking Head Synthesis on Gaussian Splatting with Few-Shot Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10921-10931} }
RINO: Rotation-Invariant Non-Rigid Correspondences: Maolin Gao,

Shao Jie Hu-Chen,

Congyue Deng,

Riccardo Marin,

Leonidas Guibas,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Maolin and Hu-Chen, Shao Jie and Deng, Congyue and Marin, Riccardo and Guibas, Leonidas and Cremers, Daniel}, title = {RINO: Rotation-Invariant Non-Rigid Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34683-34693} }
Native and Compact Structured Latents for 3D Generation: Jianfeng Xiang,

Xiaoxue Chen,

Sicheng Xu,

Ruicheng Wang,

Zelong Lv,

Yu Deng,

Hongyuan Zhu,

Yue Dong,

Hao Zhao,

Nicholas Jing Yuan,

Jiaolong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Jianfeng and Chen, Xiaoxue and Xu, Sicheng and Wang, Ruicheng and Lv, Zelong and Deng, Yu and Zhu, Hongyuan and Dong, Yue and Zhao, Hao and Yuan, Nicholas Jing and Yang, Jiaolong}, title = {Native and Compact Structured Latents for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14419-14429} }
IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation: Hao Wu,

Xiangyang Luo,

Hao Wang,

Jiawei Zhang,

Yi Zhang,

Jinwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Hao and Luo, Xiangyang and Wang, Hao and Zhang, Jiawei and Zhang, Yi and Wang, Jinwei}, title = {IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32967-32977} }
DualReg: Dual-Space Filtering and Reinforcement for Rigid Registration: Jiayi Li,

Yuxin Yao,

Qiuhang Lu,

Juyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Yao, Yuxin and Lu, Qiuhang and Zhang, Juyong}, title = {DualReg: Dual-Space Filtering and Reinforcement for Rigid Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39031-39041} }
UFO: Unifying Feed-Forward and Optimization-based Methods for Large Driving Scene Modeling: Kaiyuan Tan,

Yingying Shen,

Ziyue Zhu,

Mingfei Tu,

Haohui Zhu,

Haiyang Sun,

Bing Wang,

Guang Chen,

Hangjun Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Kaiyuan and Shen, Yingying and Zhu, Ziyue and Tu, Mingfei and Zhu, Haohui and Sun, Haiyang and Wang, Bing and Chen, Guang and Ye, Hangjun}, title = {UFO: Unifying Feed-Forward and Optimization-based Methods for Large Driving Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21849-21859} }
AdaSFormer: Adaptive Serialized Transformers for Monocular Semantic Scene Completion from Indoor Environments: Xuzhi Wang,

Xinran Wu,

Song Wang,

Lingdong Kong,

Ziping Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuzhi and Wu, Xinran and Wang, Song and Kong, Lingdong and Zhao, Ziping}, title = {AdaSFormer: Adaptive Serialized Transformers for Monocular Semantic Scene Completion from Indoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34148-34159} }
Plant Taxonomy Meets Plant Counting: A Fine-Grained, Taxonomic Dataset for Counting Hundreds of Plant Species: Jinyu Xu,

Tianqi Hu,

Xiaonan Hu,

Letian Zhou,

Songliang Cao,

Meng Zhang,

Hao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jinyu and Hu, Tianqi and Hu, Xiaonan and Zhou, Letian and Cao, Songliang and Zhang, Meng and Lu, Hao}, title = {Plant Taxonomy Meets Plant Counting: A Fine-Grained, Taxonomic Dataset for Counting Hundreds of Plant Species}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {167-177} }
Heterogeneous Decentralized Diffusion Models: Zhiying Jiang,

Raihan Seraj,

Marcos Villagra,

Bidhan Roy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhiying and Seraj, Raihan and Villagra, Marcos and Roy, Bidhan}, title = {Heterogeneous Decentralized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2391-2400} }
XSeg: A Large-scale X-ray Contraband Segmentation Benchmark For Real-World Security Screening: Hongxia Gao,

Yixin Chen,

Jiali Wen,

Litao Li,

Qianyun Liu,

Kaijie Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Hongxia and Chen, Yixin and Wen, Jiali and Li, Litao and Liu, Qianyun and Zhang, Kaijie}, title = {XSeg: A Large-scale X-ray Contraband Segmentation Benchmark For Real-World Security Screening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24950-24959} }
Zoo3D: Zero-Shot 3D Object Detection at Scene Level: Andrey Lemeshko,

Bulat Gabdullin,

Nikita Drozdov,

Anton Konushin,

Danila Rukhovich,

Maksim Kolodiazhnyi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lemeshko_2026_CVPR, author = {Lemeshko, Andrey and Gabdullin, Bulat and Drozdov, Nikita and Konushin, Anton and Rukhovich, Danila and Kolodiazhnyi, Maksim}, title = {Zoo3D: Zero-Shot 3D Object Detection at Scene Level}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25820-25829} }
TimeViper: A Hybrid Mamba-Transformer Vision-Language Model for Efficient Long Video Understanding: Boshen Xu,

Zihan Xiao,

Jiaze Li,

Jianzhong Ju,

Zhenbo Luo,

Jian Luan,

Qin Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Boshen and Xiao, Zihan and Li, Jiaze and Ju, Jianzhong and Luo, Zhenbo and Luan, Jian and Jin, Qin}, title = {TimeViper: A Hybrid Mamba-Transformer Vision-Language Model for Efficient Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40479-40493} }
FilterGS: Traversal-Free Parallel Filtering and Adaptive Shrinking for Large-Scale LoD 3D Gaussian Splatting: Yixian Wang,

Haolin Yu,

Jiadong Tang,

Yu Gao,

Xihan Wang,

Yufeng Yue,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yixian and Yu, Haolin and Tang, Jiadong and Gao, Yu and Wang, Xihan and Yue, Yufeng and Yang, Yi}, title = {FilterGS: Traversal-Free Parallel Filtering and Adaptive Shrinking for Large-Scale LoD 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26052-26061} }
Goal-Driven Reward by Video Diffusion Models for Reinforcement Learning: Qi Wang,

Mian Wu,

Yuyang Zhang,

Mingqi Yuan,

Wenyao Zhang,

Haoxiang You,

Yunbo Wang,

Xin Jin,

Xiaokang Yang,

Wenjun Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qi and Wu, Mian and Zhang, Yuyang and Yuan, Mingqi and Zhang, Wenyao and You, Haoxiang and Wang, Yunbo and Jin, Xin and Yang, Xiaokang and Zeng, Wenjun}, title = {Goal-Driven Reward by Video Diffusion Models for Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8077-8086} }
Unsupervised Multi-Scale Segmentation of 3D Subcellular World with Stable Diffusion Foundation Model: Mostofa Rafid Uddin,

HM Shadman Tabib,

Thanh-Huy Nguyen,

Kashish Gandhi,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Uddin_2026_CVPR, author = {Uddin, Mostofa Rafid and Tabib, HM Shadman and Nguyen, Thanh-Huy and Gandhi, Kashish and Xu, Min}, title = {Unsupervised Multi-Scale Segmentation of 3D Subcellular World with Stable Diffusion Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22744-22752} }
Enhancing Hands in 3D Whole-Body Pose Estimation with Conditional Hands Modulator: Gyeongsik Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, Gyeongsik}, title = {Enhancing Hands in 3D Whole-Body Pose Estimation with Conditional Hands Modulator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8891-8900} }
LiDAR-to-4DRadar Diffusion Bridge via Cross-Modal Alignment and Translation in Latent Space: Dazhong Shen,

Jingjing Gu,

Qiang Zhou,

Meng Zhao,

Ying Sun; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Dazhong and Gu, Jingjing and Zhou, Qiang and Zhao, Meng and Sun, Ying}, title = {LiDAR-to-4DRadar Diffusion Bridge via Cross-Modal Alignment and Translation in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17110-17120} }
GeniNav: Generative Model Driven Image-Goal Navigation via Imagination-Guided Consistency Flow Matching: Yuqi Chen,

Junjie Gao,

Yongzhou Pan,

Siyuan Song,

Zixuan Zhang,

Jiaping Xiao,

Mir Feroskhan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuqi and Gao, Junjie and Pan, Yongzhou and Song, Siyuan and Zhang, Zixuan and Xiao, Jiaping and Feroskhan, Mir}, title = {GeniNav: Generative Model Driven Image-Goal Navigation via Imagination-Guided Consistency Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {996-1005} }
CineSRD: Leveraging Visual, Acoustic, and Linguistic Cues for Open-World Visual Media Speaker Diarization: Liangbin Huang,

Xiaohua Liao,

Chaoqun Cui,

Shijing Wang,

Zhaolong Huang,

Yanlong Du,

Wenji Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Liangbin and Liao, Xiaohua and Cui, Chaoqun and Wang, Shijing and Huang, Zhaolong and Du, Yanlong and Mao, Wenji}, title = {CineSRD: Leveraging Visual, Acoustic, and Linguistic Cues for Open-World Visual Media Speaker Diarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8836-8845} }
Learning Personalized Photographic Style from Pairwise User Preferences: Jinwoo Kim,

Jihye Yoo,

Seon Joo Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jinwoo and Yoo, Jihye and Kim, Seon Joo}, title = {Learning Personalized Photographic Style from Pairwise User Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1134-1144} }
Reparameterized Tensor Ring Functional Decomposition for Multi-Dimensional Data Recovery: Yangyang Xu,

Junbo Ke,

You-Wei Wen,

Chao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yangyang and Ke, Junbo and Wen, You-Wei and Wang, Chao}, title = {Reparameterized Tensor Ring Functional Decomposition for Multi-Dimensional Data Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26389-26398} }
Semi-Supervised Conformal Prediction With Unlabeled Nonconformity Score: Xuanning Zhou,

Zihao Shi,

Hao Zeng,

Xiaobo Xia,

Bingyi Jing,

Hongxin Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuanning and Shi, Zihao and Zeng, Hao and Xia, Xiaobo and Jing, Bingyi and Wei, Hongxin}, title = {Semi-Supervised Conformal Prediction With Unlabeled Nonconformity Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17704-17713} }
EfficientVPR: Toward Efficient Visual Place Recognition via Scene-Aware Prompt Tuning and Adaptive Feature Enhancement: Wenjing Tang,

Chuanguang Yang,

Zhulin An,

Libo Huang,

Boyu Diao,

Yongjun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Wenjing and Yang, Chuanguang and An, Zhulin and Huang, Libo and Diao, Boyu and Xu, Yongjun}, title = {EfficientVPR: Toward Efficient Visual Place Recognition via Scene-Aware Prompt Tuning and Adaptive Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33738-33748} }
DA-Mamba: Learning Domain-Aware State Space Model for Global-Local Alignment in Domain Adaptive Object Detection: Haochen Li,

Rui Zhang,

Hantao Yao,

Xin Zhang,

Yifan Hao,

Shaohui Peng,

Yongwei Zhao,

Ling Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haochen and Zhang, Rui and Yao, Hantao and Zhang, Xin and Hao, Yifan and Peng, Shaohui and Zhao, Yongwei and Li, Ling}, title = {DA-Mamba: Learning Domain-Aware State Space Model for Global-Local Alignment in Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8000-8010} }
MU-GeNeRF: Multi-view Uncertainty-guided Generalizable Neural Radiance Fields for Distractor-aware Scene: Wenjie Mu,

Zhan Li,

Chuanzhou Su,

Xuanyi Shen,

Ziniu Liu,

Fan Lu,

Yujian Mo,

Junqiao Zhao,

Tiantian Feng,

Chen Ye,

Guang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mu_2026_CVPR, author = {Mu, Wenjie and Li, Zhan and Su, Chuanzhou and Shen, Xuanyi and Liu, Ziniu and Lu, Fan and Mo, Yujian and Zhao, Junqiao and Feng, Tiantian and Ye, Chen and Chen, Guang}, title = {MU-GeNeRF: Multi-view Uncertainty-guided Generalizable Neural Radiance Fields for Distractor-aware Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38049-38059} }
Material Magic Wand: Material-Aware Grouping of 3D Parts in Untextured Meshes: Umangi Jain,

Vladimir Kim,

Matheus Gadelha,

Igor Gilitschenski,

Zhiqin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2026_CVPR, author = {Jain, Umangi and Kim, Vladimir and Gadelha, Matheus and Gilitschenski, Igor and Chen, Zhiqin}, title = {Material Magic Wand: Material-Aware Grouping of 3D Parts in Untextured Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6377-6387} }
VQRAE: Representation Quantization Autoencoders for Multimodal Understanding, Generation and Reconstruction: Sinan Du,

Jiahao Guo,

Bo Li,

Shuhao Cui,

Zhengzhuo Xu,

Yifu Luo,

Yongxian Wei,

Kun Gai,

Xinggang Wang,

Kai Wu,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Sinan and Guo, Jiahao and Li, Bo and Cui, Shuhao and Xu, Zhengzhuo and Luo, Yifu and Wei, Yongxian and Gai, Kun and Wang, Xinggang and Wu, Kai and Yuan, Chun}, title = {VQRAE: Representation Quantization Autoencoders for Multimodal Understanding, Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30322-30334} }
Sparse-LaViDa: Sparse Multimodal Discrete Diffusion Language Models: Shufan Li,

Jiuxiang Gu,

Kangning Liu,

Zhe Lin,

Zijun Wei,

Aditya Grover,

Jason Kuen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shufan and Gu, Jiuxiang and Liu, Kangning and Lin, Zhe and Wei, Zijun and Grover, Aditya and Kuen, Jason}, title = {Sparse-LaViDa: Sparse Multimodal Discrete Diffusion Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36103-36114} }
Test-Time Training for LiDAR Semantic Segmentation under Corruption via Geometric Inlier Discrimination: Hyeonseong Kim,

Hyun-Kurl Jang,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeonseong and Jang, Hyun-Kurl and Yoon, Kuk-Jin}, title = {Test-Time Training for LiDAR Semantic Segmentation under Corruption via Geometric Inlier Discrimination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24206-24216} }
LEADER: Learning Reliable Local-to-Global Correspondences for LiDAR Relocalization: Jianshi Wu,

Minghang Zhu,

Dunqiang Liu,

Wen Li,

Sheng Ao,

Siqi Shen,

Chenglu Wen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jianshi and Zhu, Minghang and Liu, Dunqiang and Li, Wen and Ao, Sheng and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {LEADER: Learning Reliable Local-to-Global Correspondences for LiDAR Relocalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9932-9942} }
Thinking Beyond Labels: Vocabulary-Free Fine-Grained Recognition using Reasoning-Augmented LMMs: Dmitry Demidov,

Muhammad Zaigham Zaheer,

Zongyan Han,

Omkar Thawakar,

Rao Anwer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Demidov_2026_CVPR, author = {Demidov, Dmitry and Zaheer, Muhammad Zaigham and Han, Zongyan and Thawakar, Omkar and Anwer, Rao}, title = {Thinking Beyond Labels: Vocabulary-Free Fine-Grained Recognition using Reasoning-Augmented LMMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16855-16864} }
SinGeo: Unlock Single Model's Potential for Robust Cross-View Geo-Localization: Yang Chen,

Xieyuanli Chen,

Junxiang Li,

Jie Tang,

Tao Wu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yang and Chen, Xieyuanli and Li, Junxiang and Tang, Jie and Wu, Tao}, title = {SinGeo: Unlock Single Model's Potential for Robust Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19403-19412} }
JarvisEvo: Towards a Self-Evolving Photo Editing Agent with Synergistic Editor-Evaluator Optimization: Yunlong Lin,

Linqing Wang,

Kunjie Lin,

Zixu Lin,

Kaixiong Gong,

Wenbo Li,

Bin Lin,

Zhenxi Li,

Shiyi Zhang,

Yuyang Peng,

Wenxun Dai,

Xinghao Ding,

Chunyu Wang,

Qinglin Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yunlong and Wang, Linqing and Lin, Kunjie and Lin, Zixu and Gong, Kaixiong and Li, Wenbo and Lin, Bin and Li, Zhenxi and Zhang, Shiyi and Peng, Yuyang and Dai, Wenxun and Ding, Xinghao and Wang, Chunyu and Lu, Qinglin}, title = {JarvisEvo: Towards a Self-Evolving Photo Editing Agent with Synergistic Editor-Evaluator Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27291-27302} }
SyncDreamer: Controllable and Expressive Avatar Generation Beyond the Talking Head: Fatemeh Nazarieh,

Zhenhua Feng,

Diptesh Kanojia,

Josef Kittler,

Muhammad Awais; [pdf] [supp]
[bibtex]
@InProceedings{Nazarieh_2026_CVPR, author = {Nazarieh, Fatemeh and Feng, Zhenhua and Kanojia, Diptesh and Kittler, Josef and Awais, Muhammad}, title = {SyncDreamer: Controllable and Expressive Avatar Generation Beyond the Talking Head}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25121-25130} }
FusionRegister: Every Infrared and Visible Image Fusion Deserves Registration: Congcong Bian,

Haolong Ma,

Hui Li,

Zhongwei Shen,

Xiaoqing Luo,

Xiaoning Song,

Xiao-jun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bian_2026_CVPR, author = {Bian, Congcong and Ma, Haolong and Li, Hui and Shen, Zhongwei and Luo, Xiaoqing and Song, Xiaoning and Wu, Xiao-jun}, title = {FusionRegister: Every Infrared and Visible Image Fusion Deserves Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41551-41561} }
SwiftVLA: Unlocking Spatiotemporal Dynamics for Lightweight VLA Models at Minimal Overhead: Chaojun Ni,

Cheng Chen,

Xiaofeng Wang,

Zheng Zhu,

Wenzhao Zheng,

Boyuan Wang,

Tianrun Chen,

Guosheng Zhao,

Haoyun Li,

Zhehao Dong,

Qiang Zhang,

Yun Ye,

Yang Wang,

Guan Huang,

Wenjun Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Chaojun and Chen, Cheng and Wang, Xiaofeng and Zhu, Zheng and Zheng, Wenzhao and Wang, Boyuan and Chen, Tianrun and Zhao, Guosheng and Li, Haoyun and Dong, Zhehao and Zhang, Qiang and Ye, Yun and Wang, Yang and Huang, Guan and Mei, Wenjun}, title = {SwiftVLA: Unlocking Spatiotemporal Dynamics for Lightweight VLA Models at Minimal Overhead}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13474-13485} }
SpatiaLQA: A Benchmark for Evaluating Spatial Logical Reasoning in Vision-Language Models: Yuechen Xie,

Xiaoyan Zhang,

Yicheng Shan,

Zhu Hao,

Rui Tang,

Rong Wei,

Mingli Song,

Yuanyu Wan,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yuechen and Zhang, Xiaoyan and Shan, Yicheng and Hao, Zhu and Tang, Rui and Wei, Rong and Song, Mingli and Wan, Yuanyu and Song, Jie}, title = {SpatiaLQA: A Benchmark for Evaluating Spatial Logical Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2645-2657} }
Robustness Under Data Scarcity: Few-Shot Continual Adversarial Training for Evolving Threats: Wenxuan Wang,

Chenglei Wang,

Chengzhi Yan,

Xuelin Qian,

Yanning Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Wenxuan and Wang, Chenglei and Yan, Chengzhi and Qian, Xuelin and Zhang, Yanning}, title = {Robustness Under Data Scarcity: Few-Shot Continual Adversarial Training for Evolving Threats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34908-34917} }
H^2A^2: Homogeneity-Aware and Heterogeneity-Aware Feature Perception for Unified Indoor 3D Object Detection: Tao Xie,

Tao An,

Feng Liu,

Wensheng Jin,

Zhengyu Li,

Lijun Zhao,

Ruifeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Tao and An, Tao and Liu, Feng and Jin, Wensheng and Li, Zhengyu and Zhao, Lijun and Li, Ruifeng}, title = {H{\textasciicircum}2A{\textasciicircum}2: Homogeneity-Aware and Heterogeneity-Aware Feature Perception for Unified Indoor 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40717-40726} }
Omni-MMSI: Toward Identity-attributed Social Interaction Understanding: Xinpeng Li,

Bolin Lai,

Hardy Chen,

Shijian Deng,

Cihang Xie,

Yuyin Zhou,

James M. Rehg,

Yapeng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinpeng and Lai, Bolin and Chen, Hardy and Deng, Shijian and Xie, Cihang and Zhou, Yuyin and Rehg, James M. and Tian, Yapeng}, title = {Omni-MMSI: Toward Identity-attributed Social Interaction Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8684-8696} }
Mirai: Autoregressive Visual Generation Needs Foresight: Yonghao Yu,

Lang Huang,

Zerun Wang,

Runyi Li,

Toshihiko Yamasaki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yonghao and Huang, Lang and Wang, Zerun and Li, Runyi and Yamasaki, Toshihiko}, title = {Mirai: Autoregressive Visual Generation Needs Foresight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30511-30520} }
SIMPACT: Simulation-Enabled Action Planning using Vision-Language Models: Haowen Liu,

Shaoxiong Yao,

Haonan Chen,

Jiawei Gao,

Jiayuan Mao,

Jia-Bin Huang,

Yilun Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haowen and Yao, Shaoxiong and Chen, Haonan and Gao, Jiawei and Mao, Jiayuan and Huang, Jia-Bin and Du, Yilun}, title = {SIMPACT: Simulation-Enabled Action Planning using Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20790-20801} }
EMMA: Concept Erasure Benchmark with Comprehensive Semantic Metrics and Diverse Categories: Lu Wei,

Yuta Nakashima,

Noa Garcia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Lu and Nakashima, Yuta and Garcia, Noa}, title = {EMMA: Concept Erasure Benchmark with Comprehensive Semantic Metrics and Diverse Categories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37247-37257} }
Unsupervised 3d Motion Estimation Using Event Camera: Han Han,

Wei Zhai,

Tiesong Zhao,

Bin Li,

Yang Cao,

Zheng-jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Han and Zhai, Wei and Zhao, Tiesong and Li, Bin and Cao, Yang and Zha, Zheng-jun}, title = {Unsupervised 3d Motion Estimation Using Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8067-8076} }
What's Wrong with Synthetic Data for Scene Text Recognition? A Strong Synthetic Engine with Diverse Simulations and Self-Evolution: Xingsong Ye,

Yongkun Du,

JiaXin Zhang,

Chen Li,

Jing LYU,

Zhineng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xingsong and Du, Yongkun and Zhang, JiaXin and Li, Chen and LYU, Jing and Chen, Zhineng}, title = {What's Wrong with Synthetic Data for Scene Text Recognition? A Strong Synthetic Engine with Diverse Simulations and Self-Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16645-16654} }
Fed-ADE: Adaptive Learning Rate for Federated Post-adaptation under Distribution Shift: Heewon Park,

Mugon Joe,

Miru Kim,

Kyungjin Im,

Minhae Kwon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Heewon and Joe, Mugon and Kim, Miru and Im, Kyungjin and Kwon, Minhae}, title = {Fed-ADE: Adaptive Learning Rate for Federated Post-adaptation under Distribution Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24587-24597} }
Beyond Duality: A Hybrid Framework of Leveraging Shared and Private Features for RGB-Event Object Detection: Keyao Wang,

Shuai Liu,

Hengda Shi,

Lukui Shi,

Haiyong Chen; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Keyao and Liu, Shuai and Shi, Hengda and Shi, Lukui and Chen, Haiyong}, title = {Beyond Duality: A Hybrid Framework of Leveraging Shared and Private Features for RGB-Event Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4415-4424} }
Solving a Nonlinear Blind Inverse Problem for Tagged MRI with Physics and Deep Generative Priors: Zhangxing Bian,

Shuwen Wei,

Samuel W. Remedios,

Junyu Chen,

Aaron Carass,

Blake Dewey,

Jerry L Prince; [pdf] [arXiv]
[bibtex]
@InProceedings{Bian_2026_CVPR, author = {Bian, Zhangxing and Wei, Shuwen and Remedios, Samuel W. and Chen, Junyu and Carass, Aaron and Dewey, Blake and Prince, Jerry L}, title = {Solving a Nonlinear Blind Inverse Problem for Tagged MRI with Physics and Deep Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41730-41740} }
HierUQ: Hierarchical Uncertainty Quantification with Adaptive Granularity Reconciliation for Degraded Image Classification: Yang Chu,

Xiaomeng Yang,

Keli Deng,

Yuntao Qian; [pdf] [supp]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Yang and Yang, Xiaomeng and Deng, Keli and Qian, Yuntao}, title = {HierUQ: Hierarchical Uncertainty Quantification with Adaptive Granularity Reconciliation for Degraded Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11240-11249} }
Spatiotemporal Pyramid Flow Matching for Climate Emulation: Jeremy A. Irvin,

Jiaqi Han,

Zikui Wang,

Abdulaziz Alharbi,

Yufei Zhao,

Nomin-Erdene Bayarsaikhan,

Daniele Visioni,

Andrew Y. Ng,

Duncan Watson-Parris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Irvin_2026_CVPR, author = {Irvin, Jeremy A. and Han, Jiaqi and Wang, Zikui and Alharbi, Abdulaziz and Zhao, Yufei and Bayarsaikhan, Nomin-Erdene and Visioni, Daniele and Ng, Andrew Y. and Watson-Parris, Duncan}, title = {Spatiotemporal Pyramid Flow Matching for Climate Emulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42180-42190} }
PhaSR: Generalized Image Shadow Removal with Physically Aligned Priors: Chia-Ming Lee,

Yu-Fan Lin,

Yu-Jou Hsiao,

Jin-Hui Jiang,

Yu-Lun Liu,

Chih-Chung Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Chia-Ming and Lin, Yu-Fan and Hsiao, Yu-Jou and Jiang, Jin-Hui and Liu, Yu-Lun and Hsu, Chih-Chung}, title = {PhaSR: Generalized Image Shadow Removal with Physically Aligned Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22679-22688} }
Stable Spike: Dual Consistency Optimization via Bitwise AND Operations for Spiking Neural Networks: Yongqi Ding,

Kunshan Yang,

Linze Li,

Yiyang Zhang,

Mengmeng Jing,

Lin Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Yongqi and Yang, Kunshan and Li, Linze and Zhang, Yiyang and Jing, Mengmeng and Zuo, Lin}, title = {Stable Spike: Dual Consistency Optimization via Bitwise AND Operations for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {892-902} }
H-Sets: Hessian-Guided Discovery of Set-Level Feature Interactions in Image Classifiers: Ayushi Mehrotra,

Dipkamal Bhusal,

Michael Clifford,

Nidhi Rastogi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehrotra_2026_CVPR, author = {Mehrotra, Ayushi and Bhusal, Dipkamal and Clifford, Michael and Rastogi, Nidhi}, title = {H-Sets: Hessian-Guided Discovery of Set-Level Feature Interactions in Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17067-17076} }
Factorized Context Aggregation for Robust Cancer Risk Estimation via Soft Re-Ranked Retrieval and Hierarchical Anchors: Puria Azadi Moghadam,

Ali Khajegili Mirabadi,

Behnam Maneshgar,

Hossein Farahani,

Ali Bashashati; [pdf] [supp]
[bibtex]
@InProceedings{Moghadam_2026_CVPR, author = {Moghadam, Puria Azadi and Mirabadi, Ali Khajegili and Maneshgar, Behnam and Farahani, Hossein and Bashashati, Ali}, title = {Factorized Context Aggregation for Robust Cancer Risk Estimation via Soft Re-Ranked Retrieval and Hierarchical Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34095-34105} }
Relightable Holoported Characters: Capturing and Relighting Dynamic Human Performance from Sparse Views: Kunwar Maheep Singh,

Jianchun Chen,

Vladislav Golyanik,

Stephan J. Garbin,

Thabo Beeler,

Rishabh Dabral,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Kunwar Maheep and Chen, Jianchun and Golyanik, Vladislav and Garbin, Stephan J. and Beeler, Thabo and Dabral, Rishabh and Habermann, Marc and Theobalt, Christian}, title = {Relightable Holoported Characters: Capturing and Relighting Dynamic Human Performance from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28882-28892} }
GenBreak: Red Teaming Text-to-Image Generation Using Large Language Models: Zilong Wang,

Xiang Zheng,

Xiaosen Wang,

Bo Wang,

Xingjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zilong and Zheng, Xiang and Wang, Xiaosen and Wang, Bo and Ma, Xingjun}, title = {GenBreak: Red Teaming Text-to-Image Generation Using Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15730-15739} }
Selfi: Self-improving Reconstruction Engine via 3D Geometric Feature Alignment: Youming Deng,

Songyou Peng,

Junyi Zhang,

Kathryn Heal,

Tiancheng Sun,

John Flynn,

Steve Marschner,

Lucy Chai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Youming and Peng, Songyou and Zhang, Junyi and Heal, Kathryn and Sun, Tiancheng and Flynn, John and Marschner, Steve and Chai, Lucy}, title = {Selfi: Self-improving Reconstruction Engine via 3D Geometric Feature Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7351-7361} }
Token Reduction via Local and Global Contexts Optimization for Efficient Video Large Language Models: Jinlong Li,

Liyuan Jiang,

Haonan Zhang,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jinlong and Jiang, Liyuan and Zhang, Haonan and Sebe, Nicu}, title = {Token Reduction via Local and Global Contexts Optimization for Efficient Video Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10451-10461} }
Match-and-Fuse: Consistent Generation from Unstructured Image Sets: Kate Feingold,

Omri Kaduri,

Tali Dekel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feingold_2026_CVPR, author = {Feingold, Kate and Kaduri, Omri and Dekel, Tali}, title = {Match-and-Fuse: Consistent Generation from Unstructured Image Sets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30490-30499} }
Denoise and Align: Towards Source-Free UDA for Robust Panoramic Semantic Segmentation: Yaowen Chang,

Zhen Cao,

Xu Zheng,

Xiaoxin Mi,

Zhen Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Yaowen and Cao, Zhen and Zheng, Xu and Mi, Xiaoxin and Dong, Zhen}, title = {Denoise and Align: Towards Source-Free UDA for Robust Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32226-32235} }
Decoupling Defense Strategies for Robust Image Watermarking: Jiahui Chen,

Zehang Deng,

Zeyu Zhang,

Chaoyang Li,

Lianchen Jia,

Lifeng Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiahui and Deng, Zehang and Zhang, Zeyu and Li, Chaoyang and Jia, Lianchen and Sun, Lifeng}, title = {Decoupling Defense Strategies for Robust Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3316-3325} }
What Your Features Reveal: Data-Efficient Black-Box Feature Inversion Attack for Split DNNs: Zhihan Ren,

Lijun He,

Jiaxi Liang,

Xinzhu Fu,

Haixia Bi,

Fan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Zhihan and He, Lijun and Liang, Jiaxi and Fu, Xinzhu and Bi, Haixia and Li, Fan}, title = {What Your Features Reveal: Data-Efficient Black-Box Feature Inversion Attack for Split DNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13366-13375} }
OmniFM: Toward Modality-Robust and Task-Agnostic Federated Learning for Heterogeneous Medical Imaging: Meilin Liu,

Jiaying Wang,

Jing Shan; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Meilin and Wang, Jiaying and Shan, Jing}, title = {OmniFM: Toward Modality-Robust and Task-Agnostic Federated Learning for Heterogeneous Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21099-21109} }
tttLRM: Test-Time Training for Long Context and Autoregressive 3D Reconstruction: Chen Wang,

Hao Tan,

Wang Yifan,

Zhiqin Chen,

Yuheng Liu,

Kalyan Sunkavalli,

Sai Bi,

Lingjie Liu,

Yiwei Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chen and Tan, Hao and Yifan, Wang and Chen, Zhiqin and Liu, Yuheng and Sunkavalli, Kalyan and Bi, Sai and Liu, Lingjie and Hu, Yiwei}, title = {tttLRM: Test-Time Training for Long Context and Autoregressive 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36582-36592} }
Orthogonal Spatial-Aware Multi-View Anchor Graph Clustering for Incomplete Remote Sensing Data: Yongshan Zhang,

Xiaohuan Lin,

Lefei Zhang,

Zhihua Cai; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongshan and Lin, Xiaohuan and Zhang, Lefei and Cai, Zhihua}, title = {Orthogonal Spatial-Aware Multi-View Anchor Graph Clustering for Incomplete Remote Sensing Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20532-20541} }
Designing Instance-Level Sampling Schedules via REINFORCE with James-Stein Shrinkage: Peiyu Yu,

Suraj Kothawade,

Sirui Xie,

Ying Nian Wu,

Hongliang Fei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Peiyu and Kothawade, Suraj and Xie, Sirui and Wu, Ying Nian and Fei, Hongliang}, title = {Designing Instance-Level Sampling Schedules via REINFORCE with James-Stein Shrinkage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36040-36050} }
MVInverse: Feed-forward Multiview Inverse Rendering in Seconds: Xiangzuo Wu,

Chengwei Ren,

Jun Zhou,

Xiu Li,

Yuan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiangzuo and Ren, Chengwei and Zhou, Jun and Li, Xiu and Liu, Yuan}, title = {MVInverse: Feed-forward Multiview Inverse Rendering in Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37343-37357} }
MM-ACT: Learn from Multimodal Parallel Generation to Act: Haotian Liang,

Xinyi Chen,

Bin Wang,

Mingkang Chen,

Yitian Liu,

Yuhao Zhang,

Zanxin Chen,

Tianshuo Yang,

Yilun Chen,

Jiangmiao Pang,

Dong Liu,

Xiaokang Yang,

Yao Mu,

Wenqi Shao,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Haotian and Chen, Xinyi and Wang, Bin and Chen, Mingkang and Liu, Yitian and Zhang, Yuhao and Chen, Zanxin and Yang, Tianshuo and Chen, Yilun and Pang, Jiangmiao and Liu, Dong and Yang, Xiaokang and Mu, Yao and Shao, Wenqi and Luo, Ping}, title = {MM-ACT: Learn from Multimodal Parallel Generation to Act}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35080-35090} }
Rethinking Cross-Modal Anchor Alignment for Mitigating Error Accumulation: Bin Liu,

Wei Sun,

Qianqian Wang,

Wei Feng,

Yijie Chen,

Haixi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Bin and Sun, Wei and Wang, Qianqian and Feng, Wei and Chen, Yijie and Zhang, Haixi}, title = {Rethinking Cross-Modal Anchor Alignment for Mitigating Error Accumulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8664-8673} }
CountGD++: Generalized Prompting for Open-World Counting: Niki Amini-Naieni,

Andrew Zisserman; [pdf] [supp]
[bibtex]
@InProceedings{Amini-Naieni_2026_CVPR, author = {Amini-Naieni, Niki and Zisserman, Andrew}, title = {CountGD++: Generalized Prompting for Open-World Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37725-37734} }
Training-free Detection of Generated Videos via Spatial-Temporal Likelihoods: Omer Ben Hayun,

Roy Betser,

Meir Yossef Levi,

Levi Kassel,

Guy Gilboa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ben_Hayun_2026_CVPR, author = {Ben Hayun, Omer and Betser, Roy and Levi, Meir Yossef and Kassel, Levi and Gilboa, Guy}, title = {Training-free Detection of Generated Videos via Spatial-Temporal Likelihoods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16299-16310} }
PRISM: Video Dataset Condensation with Progressive Refinement and Insertion for Sparse Motion: Jaehyun Choi,

Jiwan Hur,

Gyojin Han,

Jaemyung Yu,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jaehyun and Hur, Jiwan and Han, Gyojin and Yu, Jaemyung and Kim, Junmo}, title = {PRISM: Video Dataset Condensation with Progressive Refinement and Insertion for Sparse Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26348-26357} }
Interpretable Debiasing of Vision-Language Models for Social Fairness: Na Min An,

Yoonna Jang,

Yusuke Hirota,

Ryo Hachiuma,

Isabelle Augenstein,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Na Min and Jang, Yoonna and Hirota, Yusuke and Hachiuma, Ryo and Augenstein, Isabelle and Shim, Hyunjung}, title = {Interpretable Debiasing of Vision-Language Models for Social Fairness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39326-39337} }
D3D-VLP: Dynamic 3D Vision-Language-Planning Model for Embodied Grounding and Navigation: Zihan Wang,

Seungjun Lee,

Guangzhao Dai,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihan and Lee, Seungjun and Dai, Guangzhao and Lee, Gim Hee}, title = {D3D-VLP: Dynamic 3D Vision-Language-Planning Model for Embodied Grounding and Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32463-32474} }
WorldReel: 4D Video Generation with Consistent Geometry and Motion Modeling: Shaoheng Fang,

Hanwen Jiang,

Yunpeng Bai,

Niloy J. Mitra,

Qixing Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Shaoheng and Jiang, Hanwen and Bai, Yunpeng and Mitra, Niloy J. and Huang, Qixing}, title = {WorldReel: 4D Video Generation with Consistent Geometry and Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11195-11206} }
Property-Informed Diffusion-Based Text-to-Microstructure Generation: Bingxuan Dai,

Hongsong Wang,

Jie Gui; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Bingxuan and Wang, Hongsong and Gui, Jie}, title = {Property-Informed Diffusion-Based Text-to-Microstructure Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36758-36768} }
Off The Grid: Detection of Primitives for Feed-Forward 3D Gaussian Splatting: Arthur Moreau,

Richard Shaw,

Michal Nazarczuk,

Jisu Shin,

Thomas Tanay,

Zhensong Zhang,

Songcen Xu,

Eduardo Pérez-Pellitero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moreau_2026_CVPR, author = {Moreau, Arthur and Shaw, Richard and Nazarczuk, Michal and Shin, Jisu and Tanay, Thomas and Zhang, Zhensong and Xu, Songcen and P\'erez-Pellitero, Eduardo}, title = {Off The Grid: Detection of Primitives for Feed-Forward 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11756-11766} }
Improving Diffusion Generalization with Weak-to-Strong Segmented Guidance: Liangyu Yuan,

Yufei Huang,

Mingkun Lei,

Tong Zhao,

Ruoyu Wang,

Changxi Chi,

Yiwei Wang,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Liangyu and Huang, Yufei and Lei, Mingkun and Zhao, Tong and Wang, Ruoyu and Chi, Changxi and Wang, Yiwei and Zhang, Chi}, title = {Improving Diffusion Generalization with Weak-to-Strong Segmented Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43697-43706} }
IDperturb: Enhancing Variation in Synthetic Face Generation via Angular Perturbations: Fadi Boutros,

Eduarda Caldeira,

Tahar Chettaoui,

Naser Damer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boutros_2026_CVPR, author = {Boutros, Fadi and Caldeira, Eduarda and Chettaoui, Tahar and Damer, Naser}, title = {IDperturb: Enhancing Variation in Synthetic Face Generation via Angular Perturbations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40119-40129} }
DMGD: Train-Free Dataset Distillation with Semantic-Distribution Matching in Diffusion Models: Qichao Wang,

Yunhong Lu,

Hengyuan Cao,

Junyi Zhang,

Min Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Qichao and Lu, Yunhong and Cao, Hengyuan and Zhang, Junyi and Zhang, Min}, title = {DMGD: Train-Free Dataset Distillation with Semantic-Distribution Matching in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12417-12427} }
CamPI: Physical Adversarial Examples through Camera Power Signal Injection: Yanze Ren,

Mingyuan Lv,

Qinhong Jiang,

Yan Jiang,

Chen Yan,

Xiaoyu Ji,

Wenyuan Xu; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Yanze and Lv, Mingyuan and Jiang, Qinhong and Jiang, Yan and Yan, Chen and Ji, Xiaoyu and Xu, Wenyuan}, title = {CamPI: Physical Adversarial Examples through Camera Power Signal Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6611-6620} }
FlowHijack: A Dynamics-Aware Backdoor Attack on Flow-Matching Vision-Language-Action Models: Xinyuan An,

Tao Luo,

Gengyun Peng,

Yaobing Wang,

Kui Ren,

Dongxia Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Xinyuan and Luo, Tao and Peng, Gengyun and Wang, Yaobing and Ren, Kui and Wang, Dongxia}, title = {FlowHijack: A Dynamics-Aware Backdoor Attack on Flow-Matching Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22879-22888} }
Learning Compact 3D Representations from Feed-Forward Novel View Synthesis: Honggyu An,

Jaewoo Jung,

Mungyeom Kim,

Chaehyun Kim,

Minkyeong Jeon,

Jisang Han,

Kazumi Fukuda,

Takuya Narihira,

Hyunah Ko,

Junsu Kim,

Sunghwan Hong,

Yuki Mitsufuji,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Honggyu and Jung, Jaewoo and Kim, Mungyeom and Kim, Chaehyun and Jeon, Minkyeong and Han, Jisang and Fukuda, Kazumi and Narihira, Takuya and Ko, Hyunah and Kim, Junsu and Hong, Sunghwan and Mitsufuji, Yuki and Kim, Seungryong}, title = {Learning Compact 3D Representations from Feed-Forward Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {363-373} }
ShowUI-p: Flow-based Generative Models as GUI Dexterous Hands: Siyuan Hu,

Kevin Qinghong Lin,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Siyuan and Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {ShowUI-p: Flow-based Generative Models as GUI Dexterous Hands}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8130-8140} }
How Far Can We Go With Synthetic Data for Audio-Visual Sound Source Localization?: Arda Senocak,

Sooyoung Park,

Tae-Hyun Oh,

Joon Son Chung; [pdf] [supp]
[bibtex]
@InProceedings{Senocak_2026_CVPR, author = {Senocak, Arda and Park, Sooyoung and Oh, Tae-Hyun and Chung, Joon Son}, title = {How Far Can We Go With Synthetic Data for Audio-Visual Sound Source Localization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22931-22940} }
AMusE: Audio-Visual Benchmark and Alignment Framework for Agentic Multi-Speaker Understanding: Sanjoy Chowdhury,

Karren D Yang,

Xudong Liu,

Fartash Faghri,

Pavan Kumar Anasosalu Vasu,

Oncel Tuzel,

Dinesh Manocha,

Chun-Liang Li,

Raviteja Vemulapalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2026_CVPR, author = {Chowdhury, Sanjoy and Yang, Karren D and Liu, Xudong and Faghri, Fartash and Vasu, Pavan Kumar Anasosalu and Tuzel, Oncel and Manocha, Dinesh and Li, Chun-Liang and Vemulapalli, Raviteja}, title = {AMusE: Audio-Visual Benchmark and Alignment Framework for Agentic Multi-Speaker Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22998-23009} }
Global-Graph Guided and Local-Graph Weighted Contrastive Learning for Unified Clustering on Incomplete and Noise Multi-View Data: Hongqing He,

Jie Xu,

Wenyuan Yang,

Yonghua Zhu,

Guoqiu Wen,

Xiaofeng Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Hongqing and Xu, Jie and Yang, Wenyuan and Zhu, Yonghua and Wen, Guoqiu and Zhu, Xiaofeng}, title = {Global-Graph Guided and Local-Graph Weighted Contrastive Learning for Unified Clustering on Incomplete and Noise Multi-View Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24813-24822} }
GeoDiT: A Diffusion-based Vision-Language Model for Geospatial Understanding: Jiaqi Liu,

Ronghao Fu,

Haoran Liu,

Lang Sun,

Qipeng Wang,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Fu, Ronghao and Liu, Haoran and Sun, Lang and Wang, Qipeng and Yang, Bo}, title = {GeoDiT: A Diffusion-based Vision-Language Model for Geospatial Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20608-20618} }
ReFlow: Self-correction Motion Learning for Dynamic Scene Reconstruction: Yanzhe Liang,

Ruijie Zhu,

Hanzhi Chang,

Zhuoyuan Li,

Jiahao Lu,

Tianzhu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yanzhe and Zhu, Ruijie and Chang, Hanzhi and Li, Zhuoyuan and Lu, Jiahao and Zhang, Tianzhu}, title = {ReFlow: Self-correction Motion Learning for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29803-29813} }
Is Bin Generation Indispensable? A Bin-Generation-Free Dataset Quantization via Semantic Perspective: Maijie Deng,

Yuhua Li,

Yixiong Zou,

Yao Wu,

Chenru Ma; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Maijie and Li, Yuhua and Zou, Yixiong and Wu, Yao and Ma, Chenru}, title = {Is Bin Generation Indispensable? A Bin-Generation-Free Dataset Quantization via Semantic Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33642-33651} }
CaTok: Taming Mean Flows for One-Dimensional Causal Image Tokenization: Yitong Chen,

Zuxuan Wu,

Xipeng Qiu,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yitong and Wu, Zuxuan and Qiu, Xipeng and Jiang, Yu-Gang}, title = {CaTok: Taming Mean Flows for One-Dimensional Causal Image Tokenization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23161-23171} }
TrajRAG: Retrieving Geometric-Semantic Experience for Zero-Shot Object Navigation: Yiyao Wang,

Sixian Zhang,

Keming Zhang,

Xinhang Song,

Songjie Du,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyao and Zhang, Sixian and Zhang, Keming and Song, Xinhang and Du, Songjie and Jiang, Shuqiang}, title = {TrajRAG: Retrieving Geometric-Semantic Experience for Zero-Shot Object Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15166-15176} }
SimRecon: SimReady Compositional Scene Reconstruction from Real Videos: Chong Xia,

Kai Zhu,

Zizhuo Wang,

Fangfu Liu,

Zhizheng Zhang,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Zhu, Kai and Wang, Zizhuo and Liu, Fangfu and Zhang, Zhizheng and Duan, Yueqi}, title = {SimRecon: SimReady Compositional Scene Reconstruction from Real Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42452-42463} }
FontCrafter: High-Fidelity Element-Driven Artistic Font Creation with Visual In-Context Generation: Wuyang Luo,

Chengkai Tan,

Chang Ge,

Binye Hong,

Su Yang,

Yongjiu Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Wuyang and Tan, Chengkai and Ge, Chang and Hong, Binye and Yang, Su and Ma, Yongjiu}, title = {FontCrafter: High-Fidelity Element-Driven Artistic Font Creation with Visual In-Context Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {583-593} }
From Corners to Fiducial Tags: Revisiting Checkerboard Calibration for Event Cameras: Taehun Ryu,

Changwoo Kang,

Kyungdon Joo; [pdf] [supp]
[bibtex]
@InProceedings{Ryu_2026_CVPR, author = {Ryu, Taehun and Kang, Changwoo and Joo, Kyungdon}, title = {From Corners to Fiducial Tags: Revisiting Checkerboard Calibration for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29557-29566} }
Bridging Brain and Semantics: A Hierarchical Framework for Semantically Enhanced fMRI-to-Video Reconstruction: Yujie Wei,

Chenglong Ma,

Jianxiong Gao,

Chenhui Wang,

Shiwei Zhang,

Biao Gong,

Shuai Tan,

Hangjie Yuan,

Hongming Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Yujie and Ma, Chenglong and Gao, Jianxiong and Wang, Chenhui and Zhang, Shiwei and Gong, Biao and Tan, Shuai and Yuan, Hangjie and Shan, Hongming}, title = {Bridging Brain and Semantics: A Hierarchical Framework for Semantically Enhanced fMRI-to-Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28211-28223} }
SceneTok: A Compressed, Diffusable Token Space for 3D Scenes: Mohammad Asim,

Christopher Wewer,

Jan Eric Lenssen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asim_2026_CVPR, author = {Asim, Mohammad and Wewer, Christopher and Lenssen, Jan Eric}, title = {SceneTok: A Compressed, Diffusable Token Space for 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5870-5880} }
See and Fix the Flaws: Enabling VLMs and Diffusion Models to Comprehend Visual Artifacts via Agentic Data Synthesis: Jaehyun Park,

Minyoung Ahn,

Minkyu Kim,

Jonghyun Lee,

Jae-Gil Lee,

Dongmin Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jaehyun and Ahn, Minyoung and Kim, Minkyu and Lee, Jonghyun and Lee, Jae-Gil and Park, Dongmin}, title = {See and Fix the Flaws: Enabling VLMs and Diffusion Models to Comprehend Visual Artifacts via Agentic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35810-35820} }
DRM: Diffusion-based Reward Model With Step-wise Guidance: Jaxon Zhang,

Binxin Yang,

Hubery Yin,

Chen Li,

Jing LYU; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jaxon and Yang, Binxin and Yin, Hubery and Li, Chen and LYU, Jing}, title = {DRM: Diffusion-based Reward Model With Step-wise Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12764-12774} }
DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior: Junjia Huang,

Binbin Yang,

Pengxiang Yan,

Jiyang Liu,

Bin Xia,

Zhao Wang,

Yitong Wang,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Junjia and Yang, Binbin and Yan, Pengxiang and Liu, Jiyang and Xia, Bin and Wang, Zhao and Wang, Yitong and Lin, Liang and Li, Guanbin}, title = {DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36736-36746} }
Gallant: Voxel Grid-based Humanoid Locomotion and Local-navigation across 3-D Constrained Terrains: Qingwei Ben,

Botian Xu,

Kailin Li,

Feiyu Jia,

Wentao Zhang,

Jingping Wang,

Jingbo Wang,

Dahua Lin,

Jiangmiao Pang; [pdf] [supp]
[bibtex]
@InProceedings{Ben_2026_CVPR, author = {Ben, Qingwei and Xu, Botian and Li, Kailin and Jia, Feiyu and Zhang, Wentao and Wang, Jingping and Wang, Jingbo and Lin, Dahua and Pang, Jiangmiao}, title = {Gallant: Voxel Grid-based Humanoid Locomotion and Local-navigation across 3-D Constrained Terrains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28086-28095} }
UniM: A Unified Any-to-Any Interleaved Multimodal Benchmark: Yanlin Li,

Minghui Guo,

Kaiwen Zhang,

Shize Zhang,

Yiran Zhao,

Haodong Li,

Congyue Zhou,

Weijie Zheng,

Yushen Yan,

Shengqiong Wu,

Wei Ji,

Lei Cui,

Furu Wei,

Hao Fei,

Mong-Li Lee,

Wynne Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanlin and Guo, Minghui and Zhang, Kaiwen and Zhang, Shize and Zhao, Yiran and Li, Haodong and Zhou, Congyue and Zheng, Weijie and Yan, Yushen and Wu, Shengqiong and Ji, Wei and Cui, Lei and Wei, Furu and Fei, Hao and Lee, Mong-Li and Hsu, Wynne}, title = {UniM: A Unified Any-to-Any Interleaved Multimodal Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15902-15911} }
A Closer Look at Cross-Domain Few-Shot Object Detection: Fine-Tuning Matters and Parallel Decoder Helps: Xuanlong Yu,

Youyang Sha,

Longfei Liu,

Xi Shen,

Di Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xuanlong and Sha, Youyang and Liu, Longfei and Shen, Xi and Yang, Di}, title = {A Closer Look at Cross-Domain Few-Shot Object Detection: Fine-Tuning Matters and Parallel Decoder Helps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26593-26603} }
Do Vision-Language Models Measure Up? Benchmarking Visual Measurement Reading with MeasureBench: Fenfen Lin,

Yesheng Liu,

Haiyu Xu,

Yue Chen,

Zheqi He,

Mingxuan Zhao,

Miguel Hu Chen,

Jin-Ge Yao,

Xi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Fenfen and Liu, Yesheng and Xu, Haiyu and Chen, Yue and He, Zheqi and Zhao, Mingxuan and Chen, Miguel Hu and Yao, Jin-Ge and Yang, Xi}, title = {Do Vision-Language Models Measure Up? Benchmarking Visual Measurement Reading with MeasureBench}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38544-38553} }
TruckDrive: Long-Range Autonomous Highway Driving Dataset: Filippo Ghilotti,

Edoardo Palladin,

Samuel Brucker,

Adam Sigal,

Mario Bijelic,

Felix Heide; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghilotti_2026_CVPR, author = {Ghilotti, Filippo and Palladin, Edoardo and Brucker, Samuel and Sigal, Adam and Bijelic, Mario and Heide, Felix}, title = {TruckDrive: Long-Range Autonomous Highway Driving Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10587-10598} }
Spe-BEVHead: Rethinking the Detection Head Design for Bird's-Eye-View Object Detection: Junshu Zhang,

Sicheng Zhao,

Xin Zhao,

Fan Yang,

Ruike Chen,

Jungong Han,

Guiguang Ding; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junshu and Zhao, Sicheng and Zhao, Xin and Yang, Fan and Chen, Ruike and Han, Jungong and Ding, Guiguang}, title = {Spe-BEVHead: Rethinking the Detection Head Design for Bird's-Eye-View Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25799-25809} }
MVP: Multiple View Prediction Improves GUI Grounding: Yunzhu Zhang,

Zeyu Pan,

Zhengwen Zeng,

Shuheng Shen,

Changhua Meng,

Linchao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yunzhu and Pan, Zeyu and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Zhu, Linchao}, title = {MVP: Multiple View Prediction Improves GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27482-27492} }
Towards GUI Agents: Vision-Language Diffusion Models for GUI Grounding: Shrinidhi Kumbhar,

Haofu Liao,

Srikar Appalaraju,

Kunwar Yashraj Singh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumbhar_2026_CVPR, author = {Kumbhar, Shrinidhi and Liao, Haofu and Appalaraju, Srikar and Singh, Kunwar Yashraj}, title = {Towards GUI Agents: Vision-Language Diffusion Models for GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27493-27502} }
Compressed-Domain-Aware Online Video Super-Resolution: Yuhang Wang,

Hai Li,

Shujuan Hou,

Zhetao Dong,

Xiaoyao Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhang and Li, Hai and Hou, Shujuan and Dong, Zhetao and Yang, Xiaoyao}, title = {Compressed-Domain-Aware Online Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33621-33631} }
Multimodal Causality-Driven Representation Learning for Generalizable Medical Image Segmentation: Xusheng Liang,

Lihua Zhou,

Nianxin Li,

Miao Xu,

Ziyang Song,

Dong Yi,

Jinlin Wu,

Jiawei Ma,

Hongbin Liu,

Zhen Lei,

Jiebo Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Xusheng and Zhou, Lihua and Li, Nianxin and Xu, Miao and Song, Ziyang and Yi, Dong and Wu, Jinlin and Ma, Jiawei and Liu, Hongbin and Lei, Zhen and Luo, Jiebo}, title = {Multimodal Causality-Driven Representation Learning for Generalizable Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13670-13679} }
Efficient Hybrid SE(3)-Equivariant Visuomotor Flow Policy via Spherical Harmonics for Robot Manipulation: Qinglun Zhang,

Shen Cheng,

Tian Dan,

Haoqiang Fan,

Guanghui Liu,

Shuaicheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qinglun and Cheng, Shen and Dan, Tian and Fan, Haoqiang and Liu, Guanghui and Liu, Shuaicheng}, title = {Efficient Hybrid SE(3)-Equivariant Visuomotor Flow Policy via Spherical Harmonics for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27989-27998} }
PAD-Hand: Physics-Aware Diffusion for Hand Motion Recovery: Elkhan Ismayilzada,

Yufei Zhang,

Zijun Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ismayilzada_2026_CVPR, author = {Ismayilzada, Elkhan and Zhang, Yufei and Cui, Zijun}, title = {PAD-Hand: Physics-Aware Diffusion for Hand Motion Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28358-28368} }
SVBench: Evaluation of Video Generation Models on Social Reasoning: Wenshuo Peng,

Gongxuan Wang,

Tianmeng Yang,

Chuanhao Li,

Xiaojie Xu,

Hui He,

Kaipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Wenshuo and Wang, Gongxuan and Yang, Tianmeng and Li, Chuanhao and Xu, Xiaojie and He, Hui and Zhang, Kaipeng}, title = {SVBench: Evaluation of Video Generation Models on Social Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32872-32881} }
GeoFlow: Real-Time Fine-Grained Cross-View Geolocalization via Iterative Flow Prediction: Ayesh Abu Lehyeh,

Xiaohan Zhang,

Ahmad Arrabi,

Waqas Sultani,

Chen Chen,

Safwan Wshah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abu_Lehyeh_2026_CVPR, author = {Abu Lehyeh, Ayesh and Zhang, Xiaohan and Arrabi, Ahmad and Sultani, Waqas and Chen, Chen and Wshah, Safwan}, title = {GeoFlow: Real-Time Fine-Grained Cross-View Geolocalization via Iterative Flow Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5369-5378} }
Scaling Agentic Reinforcement Learning for Tool-Integrated Reasoning in VLMs: Meng Lu,

Ran Xu,

Yi Fang,

Wenxuan Zhang,

Yue Yu,

Gaurav Srivastava,

Yuchen Zhuang,

Mohamed Elhoseiny,

Charles Fleming,

Carl Yang,

Zhengzhong Tu,

Yang Xie,

Guanghua Xiao,

Di Jin,

Wenqi Shi,

Xuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Meng and Xu, Ran and Fang, Yi and Zhang, Wenxuan and Yu, Yue and Srivastava, Gaurav and Zhuang, Yuchen and Elhoseiny, Mohamed and Fleming, Charles and Yang, Carl and Tu, Zhengzhong and Xie, Yang and Xiao, Guanghua and Jin, Di and Shi, Wenqi and Wang, Xuan}, title = {Scaling Agentic Reinforcement Learning for Tool-Integrated Reasoning in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26518-26529} }
TESO: Online Tracking of Essential Matrix by Stochastic Optimization: Jaroslav Moravec,

Radim Sara,

Akihiro Sugimoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moravec_2026_CVPR, author = {Moravec, Jaroslav and Sara, Radim and Sugimoto, Akihiro}, title = {TESO: Online Tracking of Essential Matrix by Stochastic Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28819-28827} }
AGENTSAFE: Benchmarking the Safety of Embodied Agents on Hazardous Instructions: Zonghao Ying,

Le Wang,

Yisong Xiao,

Jiakai Wang,

Yuqing Ma,

Jinyang Guo,

Zhenfei Yin,

Mingchuan Zhang,

Aishan Liu,

Xianglong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ying_2026_CVPR, author = {Ying, Zonghao and Wang, Le and Xiao, Yisong and Wang, Jiakai and Ma, Yuqing and Guo, Jinyang and Yin, Zhenfei and Zhang, Mingchuan and Liu, Aishan and Liu, Xianglong}, title = {AGENTSAFE: Benchmarking the Safety of Embodied Agents on Hazardous Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37664-37673} }
RealBirdID: Benchmarking Bird Species Identification in the Era of MLLMs: Logan Lawrence,

Oindrila Saha,

Rangel Daroya,

Mustafa Chasmai,

Wuao Liu,

Max Hamilton,

Aaron Sun,

Seoyun Jeong,

Fabien Delattre,

Subhransu Maji,

Grant Van Horn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lawrence_2026_CVPR, author = {Lawrence, Logan and Saha, Oindrila and Daroya, Rangel and Chasmai, Mustafa and Liu, Wuao and Hamilton, Max and Sun, Aaron and Jeong, Seoyun and Delattre, Fabien and Maji, Subhransu and Van Horn, Grant}, title = {RealBirdID: Benchmarking Bird Species Identification in the Era of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2445-2456} }
Diff4Splat: Repurposing Video Diffusion Models for Dynamic Scene Generation: Panwang Pan,

Chenguo Lin,

Chenxin Li,

Jingjing Zhao,

Yuchen Lin,

Haopeng Li,

Yunlong Lin,

Kairun Wen,

Yixuan Yuan,

Yadong MU; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Panwang and Lin, Chenguo and Li, Chenxin and Zhao, Jingjing and Lin, Yuchen and Li, Haopeng and Lin, Yunlong and Wen, Kairun and Yuan, Yixuan and MU, Yadong}, title = {Diff4Splat: Repurposing Video Diffusion Models for Dynamic Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4232-4244} }
DiT-Distill: Open-Set Fine-Grained Retrieval via Generative Curriculum Knowledge: Xin Jiang,

Hao Tang,

Meiqi Cao,

Junyao Gao,

Fei Shen,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xin and Tang, Hao and Cao, Meiqi and Gao, Junyao and Shen, Fei and Li, Zechao}, title = {DiT-Distill: Open-Set Fine-Grained Retrieval via Generative Curriculum Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38753-38762} }
Unleashing VLA Potentials in Autonomous Driving via Explicit Learning from Failures: Yuechen Luo,

Fang Li,

Qimao Chen,

Shaoqing Xu,

Jiaxin Liu,

Ziying Song,

Zhi-xin Yang,

Fuxi Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Yuechen and Li, Fang and Chen, Qimao and Xu, Shaoqing and Liu, Jiaxin and Song, Ziying and Yang, Zhi-xin and Wen, Fuxi}, title = {Unleashing VLA Potentials in Autonomous Driving via Explicit Learning from Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24833-24842} }
X-WIN: Building Chest Radiograph World Model via Predictive Sensing: Zefan Yang,

Ge Wang,

James Hendler,

Mannudeep K. Kalra,

Pingkun Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zefan and Wang, Ge and Hendler, James and Kalra, Mannudeep K. and Yan, Pingkun}, title = {X-WIN: Building Chest Radiograph World Model via Predictive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6920-6930} }
OPRO: Orthogonal Panel-Relative Operators for Panel-Aware In-Context Image Generation: Sanghyeon Lee,

Minwoo Lee,

Euijin Shin,

Kangyeol Kim,

Seunghwan Choi,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghyeon and Lee, Minwoo and Shin, Euijin and Kim, Kangyeol and Choi, Seunghwan and Choo, Jaegul}, title = {OPRO: Orthogonal Panel-Relative Operators for Panel-Aware In-Context Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9233-9242} }
Submodel Extraction for Efficient and Personalized Federated Learning via Optimal Transport: Zheng Jiang,

Nan He,

Yiming Chen,

Lifeng Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zheng and He, Nan and Chen, Yiming and Sun, Lifeng}, title = {Submodel Extraction for Efficient and Personalized Federated Learning via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3369-3378} }
Improving Controllable Generation: Faster Training and Better Performance via x0-Supervision: Amadou S. Sangare,

Adrien Maglo,

Mohamed Chaouch,

Bertrand Luvison; [pdf] [supp]
[bibtex]
@InProceedings{Sangare_2026_CVPR, author = {Sangare, Amadou S. and Maglo, Adrien and Chaouch, Mohamed and Luvison, Bertrand}, title = {Improving Controllable Generation: Faster Training and Better Performance via x0-Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9106-9115} }
Rethinking Visual Rearrangement from A Diffusion Perspective: Tianliang Qi,

Xinhang Song,

Yuyi Liu,

Shuqiang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Tianliang and Song, Xinhang and Liu, Yuyi and Jiang, Shuqiang}, title = {Rethinking Visual Rearrangement from A Diffusion Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15222-15231} }
ClimaOoD: Improving Anomaly Segmentation via Physically Realistic Synthetic Data: Yuxing Liu,

Zheng Li,

Huanhuan Liang,

Ji Zhang,

Zeyu Sun,

Yong Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxing and Li, Zheng and Liang, Huanhuan and Zhang, Ji and Sun, Zeyu and Liu, Yong}, title = {ClimaOoD: Improving Anomaly Segmentation via Physically Realistic Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17853-17862} }
Multi-View Hierarchical Alignment Learning for Spatial Transcriptomics: Zhengzhong Zhu,

Liangjin Liu,

Pei Zhou,

Shiquan Min,

Jiangping Zhu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Liu, Liangjin and Zhou, Pei and Min, Shiquan and Zhu, Jiangping}, title = {Multi-View Hierarchical Alignment Learning for Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26783-26792} }
AdaIAT: Adaptively Increasing Attention to Generated Text to Alleviate Hallucinations in LVLM: Li'an Zhong,

Ziqiang He,

Jibin Zheng,

Jin Li,

Z. Jane Wang,

Xiangui Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Li'an and He, Ziqiang and Zheng, Jibin and Li, Jin and Wang, Z. Jane and Kang, Xiangui}, title = {AdaIAT: Adaptively Increasing Attention to Generated Text to Alleviate Hallucinations in LVLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11076-11085} }
Visual-RRT: Finding Paths toward Visual-Goals via Differentiable Rendering: Sebin Lee,

Jumin Lee,

Taeyeon Kim,

Youngju Na,

Woobin Im,

Sung-Eui Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sebin and Lee, Jumin and Kim, Taeyeon and Na, Youngju and Im, Woobin and Yoon, Sung-Eui}, title = {Visual-RRT: Finding Paths toward Visual-Goals via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13486-13495} }
Action-Sketcher: From Reasoning to Action via Visual Sketches for Robotic Manipulation: Huajie Tan,

Peterson Co,

Yijie Xu,

Shanyu Rong,

Yuheng Ji,

Cheng Chi,

Xiansheng Chen,

Zhongxia Zhao,

Pengwei Wang,

Zhongyuan Wang,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Huajie and Co, Peterson and Xu, Yijie and Rong, Shanyu and Ji, Yuheng and Chi, Cheng and Chen, Xiansheng and Zhao, Zhongxia and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {Action-Sketcher: From Reasoning to Action via Visual Sketches for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22433-22444} }
Mixture-of-Experts based Feature Decoupling for Open Vocabulary Scene Graph Generation: Yiming Li,

Sisi You,

Bing-Kun Bao; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yiming and You, Sisi and Bao, Bing-Kun}, title = {Mixture-of-Experts based Feature Decoupling for Open Vocabulary Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32206-32215} }
FAPE-IR: Frequency-Aware Planning and Execution Framework for All-in-One Image Restoration: Jingren Liu,

Shuning Xu,

Qirui Yang,

Yun Wang,

Xiangyu Chen,

Zhong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jingren and Xu, Shuning and Yang, Qirui and Wang, Yun and Chen, Xiangyu and Ji, Zhong}, title = {FAPE-IR: Frequency-Aware Planning and Execution Framework for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15561-15573} }
GP-4DGS: Probabilistic 4D Gaussian Splatting from Monocular Video via Variational Gaussian Processes: Mijeong Kim,

Jungtaek Kim,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Mijeong and Kim, Jungtaek and Han, Bohyung}, title = {GP-4DGS: Probabilistic 4D Gaussian Splatting from Monocular Video via Variational Gaussian Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33394-33403} }
Asking like Socrates: Socrates helps VLMs understand remote sensing images: Run Shao,

Ziyu Li,

Zhaoyang Zhang,

Linrui Xu,

Xinran He,

Hongyuan Yuan,

Bolei He,

Yongxing Dai,

Yiming Yan,

Yijun Chen,

Wang Guo,

Haifeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Run and Li, Ziyu and Zhang, Zhaoyang and Xu, Linrui and He, Xinran and Yuan, Hongyuan and He, Bolei and Dai, Yongxing and Yan, Yiming and Chen, Yijun and Guo, Wang and Li, Haifeng}, title = {Asking like Socrates: Socrates helps VLMs understand remote sensing images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26465-26475} }
EgoSound: Benchmarking Sound Understanding in Egocentric Videos: Bingwen Zhu,

Yuqian Fu,

Qiaole Dong,

Guolei Sun,

Tianwen Qian,

Yuzheng Wu,

Danda Pani Paudel,

Yanwei Fu,

Xiangyang Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Bingwen and Fu, Yuqian and Dong, Qiaole and Sun, Guolei and Qian, Tianwen and Wu, Yuzheng and Paudel, Danda Pani and Fu, Yanwei and Xue, Xiangyang}, title = {EgoSound: Benchmarking Sound Understanding in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25589-25598} }
STiTch: Semantic Transition and Transportation in Collaboration for Training-Free Zero-Shot Composed Image Retrieval: Miaoge Li,

Dongsheng Wang,

Zening Sun,

Jinsen Zhang,

Wenhan Luo,

Jingcai Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Miaoge and Wang, Dongsheng and Sun, Zening and Zhang, Jinsen and Luo, Wenhan and Guo, Jingcai}, title = {STiTch: Semantic Transition and Transportation in Collaboration for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12374-12384} }
Language-guided Frequency Modulation for Large Vision-Language Models: Shuyi Ouyang,

Gongfan Fang,

Xinyin Ma,

Yen-Wei Chen,

Lanfen Lin,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Shuyi and Fang, Gongfan and Ma, Xinyin and Chen, Yen-Wei and Lin, Lanfen and Wang, Xinchao}, title = {Language-guided Frequency Modulation for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39175-39185} }
Masked Representation Modeling for Domain-Adaptive Segmentation: Wenlve Zhou,

Zhiheng Zhou,

Tiantao Xian,

Yikui Zhai,

Weibin Wu,

Biyun MA; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wenlve and Zhou, Zhiheng and Xian, Tiantao and Zhai, Yikui and Wu, Weibin and MA, Biyun}, title = {Masked Representation Modeling for Domain-Adaptive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36999-37009} }
Test-Time Attention Purification for Backdoored Large Vision Language Models: Zhifang Zhang,

Bojun Yang,

Shuo He,

Weitong Chen,

Wei Emma Zhang,

Olaf Maennel,

Lei Feng,

Miao Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhifang and Yang, Bojun and He, Shuo and Chen, Weitong and Zhang, Wei Emma and Maennel, Olaf and Feng, Lei and Xu, Miao}, title = {Test-Time Attention Purification for Backdoored Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22826-22835} }
SCoRe: Salience-Coverage Reduction for Vision Token Pruning in Vision-Language Models: Tong Xu,

Hailong Shi,

Xingyu Gao; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Tong and Shi, Hailong and Gao, Xingyu}, title = {SCoRe: Salience-Coverage Reduction for Vision Token Pruning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24686-24695} }
Hear What Matters! Text-conditioned Selective Video-to-Audio Generation: Junwon Lee,

Juhan Nam,

Jiyoung Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junwon and Nam, Juhan and Lee, Jiyoung}, title = {Hear What Matters! Text-conditioned Selective Video-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36680-36690} }
Cross-Modal Guided Visual Synthesis for Data-Efficient Multimodal Depression Recognition: Shanliang Yang,

Xiaoxiao Wang; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Shanliang and Wang, Xiaoxiao}, title = {Cross-Modal Guided Visual Synthesis for Data-Efficient Multimodal Depression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15933-15943} }
Exploring 6D Object Pose Estimation with Deformation: Zhiqiang Liu,

Rui Song,

Duanmu Chuangqi,

Jiaojiao Li,

David Ferstl,

Yinlin Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiqiang and Song, Rui and Chuangqi, Duanmu and Li, Jiaojiao and Ferstl, David and Hu, Yinlin}, title = {Exploring 6D Object Pose Estimation with Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33078-33087} }
Role-SynthCLIP: A Role-Play Driven Diverse Synthetic Data Approach: Yuanxiang Huangfu,

Chaochao Wang,

Weilei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huangfu_2026_CVPR, author = {Huangfu, Yuanxiang and Wang, Chaochao and Wang, Weilei}, title = {Role-SynthCLIP: A Role-Play Driven Diverse Synthetic Data Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10142-10151} }
MAPo: Motion-Aware Partitioning of Deformable 3D Gaussian Splatting for High-Fidelity Dynamic Scene Reconstruction: Han Jiao,

Jiakai Sun,

Yexing Xu,

Lei Zhao,

Wei Xing,

Huaizhong Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Han and Sun, Jiakai and Xu, Yexing and Zhao, Lei and Xing, Wei and Lin, Huaizhong}, title = {MAPo: Motion-Aware Partitioning of Deformable 3D Gaussian Splatting for High-Fidelity Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11767-11776} }
D-Prism: Differentiable Primitives for Structured Dynamic Modeling: Xingyuan Yu,

Yijin Li,

Chong Zeng,

Yuhang Ming,

Hujun Bao,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xingyuan and Li, Yijin and Zeng, Chong and Ming, Yuhang and Bao, Hujun and Zhang, Guofeng}, title = {D-Prism: Differentiable Primitives for Structured Dynamic Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7556-7566} }
Perceiving the Near, Reasoning the Distant: Coherent Long-Horizon Trajectory Prediction for Autonomous Driving: Hua Hu,

Zikang Zhou,

Qian Zhou,

Zihao Wen,

Junjie Hu,

Xinhong Chen,

Zhengmin Jiang,

Yung-Hui Li,

Jianping Wang; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Hua and Zhou, Zikang and Zhou, Qian and Wen, Zihao and Hu, Junjie and Chen, Xinhong and Jiang, Zhengmin and Li, Yung-Hui and Wang, Jianping}, title = {Perceiving the Near, Reasoning the Distant: Coherent Long-Horizon Trajectory Prediction for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24875-24884} }
DRS-GUI: Dynamic Region Search for Training-Free GUI Grounding: Yichao Liu,

Huawen Shen,

Liu Yu,

Shiyu Liu,

Zeyu Chen,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yichao and Shen, Huawen and Yu, Liu and Liu, Shiyu and Chen, Zeyu and Zhou, Yu}, title = {DRS-GUI: Dynamic Region Search for Training-Free GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34606-34616} }
DiverseDiT: Towards Diverse Representation Learning in Diffusion Transformers: Mengping Yang,

Zhiyu Tan,

Binglei Li,

Xiaomeng Yang,

Hesen Chen,

Hao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Mengping and Tan, Zhiyu and Li, Binglei and Yang, Xiaomeng and Chen, Hesen and Li, Hao}, title = {DiverseDiT: Towards Diverse Representation Learning in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40591-40601} }
SCIEval: Evaluating and Benchmarking the Faithfulness of Scientific Image Generation and Interpretation with Large Multimodal Models: Guanghui Ye,

Huan Zhao,

Zhixue Zhao,

Tengfei Ma,

Kehan Wang,

Steffen Eger,

Zhihua Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Guanghui and Zhao, Huan and Zhao, Zhixue and Ma, Tengfei and Wang, Kehan and Eger, Steffen and Jiang, Zhihua}, title = {SCIEval: Evaluating and Benchmarking the Faithfulness of Scientific Image Generation and Interpretation with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29760-29770} }
LiDAS: Lighting-driven Dynamic Active Sensing for Nighttime Perception: Simon de Moreau,

Andrei Bursuc,

Hafid El Idrissi,

Fabien Moutarde; [pdf] [supp]
[bibtex]
@InProceedings{de_Moreau_2026_CVPR, author = {de Moreau, Simon and Bursuc, Andrei and El Idrissi, Hafid and Moutarde, Fabien}, title = {LiDAS: Lighting-driven Dynamic Active Sensing for Nighttime Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14188-14197} }
RoboAgent: Chaining Basic Capabilities for Embodied Task Planning: Peiran Xu,

Jiaqi Zheng,

Yadong Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Peiran and Zheng, Jiaqi and Mu, Yadong}, title = {RoboAgent: Chaining Basic Capabilities for Embodied Task Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15276-15290} }
Unsupervised Monocular 3D Keypoint Discovery from Multi-View Diffusion Priors: Subin Jeon,

In Cho,

Junyoung Hong,

Woong Oh Cho,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2026_CVPR, author = {Jeon, Subin and Cho, In and Hong, Junyoung and Cho, Woong Oh and Kim, Seon Joo}, title = {Unsupervised Monocular 3D Keypoint Discovery from Multi-View Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17132-17142} }
AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention: Lei Xiao,

Jifeng Li,

Juntao Gao,

Feiyang Ye,

Yan Jin,

Jingjing Qian,

Jing Zhang,

Yong Wu,

Xiaoyuan Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Lei and Li, Jifeng and Gao, Juntao and Ye, Feiyang and Jin, Yan and Qian, Jingjing and Zhang, Jing and Wu, Yong and Yu, Xiaoyuan}, title = {AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13453-13463} }
URScenes: A Multi-scenario Dataset for Unstructured Road Environments: Runsen Liu,

Aizemaitijiang Baoerhan,

Zhangyu Wang,

Jie Wang,

Jinghao Cui,

Guizhen Yu,

Songyue Yang,

WanCheng Sun,

Mingjun Tang,

Zhanbo Hua,

Wenwen Luo; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Runsen and Baoerhan, Aizemaitijiang and Wang, Zhangyu and Wang, Jie and Cui, Jinghao and Yu, Guizhen and Yang, Songyue and Sun, WanCheng and Tang, Mingjun and Hua, Zhanbo and Luo, Wenwen}, title = {URScenes: A Multi-scenario Dataset for Unstructured Road Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17874-17883} }
EgoControl: Controllable Egocentric Video Generation via 3D Full-Body Poses: Enrico Pallotta,

Sina Mokhtarzadeh Azar,

Lars Doorenbos,

Serdar Ozsoy,

Umar Iqbal,

Juergen Gall; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pallotta_2026_CVPR, author = {Pallotta, Enrico and Azar, Sina Mokhtarzadeh and Doorenbos, Lars and Ozsoy, Serdar and Iqbal, Umar and Gall, Juergen}, title = {EgoControl: Controllable Egocentric Video Generation via 3D Full-Body Poses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4269-4279} }
Act2See: Emergent Active Visual Perception for Video Reasoning: Martin Q. Ma,

Yuxiao Qu,

Aditya Agrawal,

Willis Guo,

Paul Pu Liang,

Ruslan Salakhutdinov,

Louis-Philippe Morency; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Martin Q. and Qu, Yuxiao and Agrawal, Aditya and Guo, Willis and Liang, Paul Pu and Salakhutdinov, Ruslan and Morency, Louis-Philippe}, title = {Act2See: Emergent Active Visual Perception for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5455-5464} }
SemLayer: Semantic-aware Generative Segmentation and Layer Construction for Abstract Icons: Haiyang Xu,

Ronghuan Wu,

Li-Yi Wei,

Nanxuan Zhao,

Chenxi Liu,

Cuong Nguyen,

Zhuowen Tu,

Zhaowen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Haiyang and Wu, Ronghuan and Wei, Li-Yi and Zhao, Nanxuan and Liu, Chenxi and Nguyen, Cuong and Tu, Zhuowen and Wang, Zhaowen}, title = {SemLayer: Semantic-aware Generative Segmentation and Layer Construction for Abstract Icons}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42082-42092} }
From Indoor to Open World: Revealing the Spatial Reasoning Gap in MLLMs: Mingrui Wu,

Zhaozhi Wang,

Fangjinhua Wang,

Jiaolong Yang,

Marc Pollefeys,

Tong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mingrui and Wang, Zhaozhi and Wang, Fangjinhua and Yang, Jiaolong and Pollefeys, Marc and Zhang, Tong}, title = {From Indoor to Open World: Revealing the Spatial Reasoning Gap in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16789-16799} }
Noise-Aware Few-Shot Learning through Bi-directional Multi-View Prompt Alignment: Lu Niu,

Cheng Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Lu and Xue, Cheng}, title = {Noise-Aware Few-Shot Learning through Bi-directional Multi-View Prompt Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41646-41656} }
RaGS: Unleashing 3D Gaussian Splatting from 4D Radar and Monocular Cue for 3D Object Detection: Xiaokai Bai,

Chenxu Zhou,

Lianqing Zheng,

Jianan Liu,

Si-Yuan Cao,

Xiaohan Zhang,

Yiming Li,

Zhengzhuang Zhang,

Hui-Liang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Xiaokai and Zhou, Chenxu and Zheng, Lianqing and Liu, Jianan and Cao, Si-Yuan and Zhang, Xiaohan and Li, Yiming and Zhang, Zhengzhuang and Shen, Hui-Liang}, title = {RaGS: Unleashing 3D Gaussian Splatting from 4D Radar and Monocular Cue for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4983-4992} }
MARSS: Radar Semantic Segmentation via Modular Attention and State Space Models: Fengyu Chen,

Tiao Tan,

Teng Li,

Yuantian Quan,

Qingmin Liao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Fengyu and Tan, Tiao and Li, Teng and Quan, Yuantian and Liao, Qingmin}, title = {MARSS: Radar Semantic Segmentation via Modular Attention and State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17959-17968} }
GH-NAF: Grid-Adaptive Hash-Level-Attended Neural Attenuation Fields for Discrepancy-Aware CBCT: Seong Je Oh,

Ju Hwan Lee,

Chae Yeon Lim,

Donghwan Lee,

Myung Jin Chung,

Kyungsu Kim; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Seong Je and Lee, Ju Hwan and Lim, Chae Yeon and Lee, Donghwan and Chung, Myung Jin and Kim, Kyungsu}, title = {GH-NAF: Grid-Adaptive Hash-Level-Attended Neural Attenuation Fields for Discrepancy-Aware CBCT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41762-41772} }
Towards Generalizable AI-Generated Image Detection via Image-Adaptive Prompt Learning: Yiheng Li,

Zichang Tan,

Guoqing Xu,

Zhen Lei,

Xu Zhou,

Yang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yiheng and Tan, Zichang and Xu, Guoqing and Lei, Zhen and Zhou, Xu and Yang, Yang}, title = {Towards Generalizable AI-Generated Image Detection via Image-Adaptive Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21262-21272} }
WAM-Flow: Parallel Coarse-to-Fine Motion Planning via Discrete Flow Matching for Autonomous Driving: Yifang Xu,

Jiahao Cui,

Zhihao Zhu,

Hanlin Shang,

Shan Luan,

Mingwang Xu,

Feipeng Cai,

Neng Zhang,

Yaoyi Li,

Jia Cai,

Siyu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yifang and Cui, Jiahao and Zhu, Zhihao and Shang, Hanlin and Luan, Shan and Xu, Mingwang and Cai, Feipeng and Zhang, Neng and Li, Yaoyi and Cai, Jia and Zhu, Siyu}, title = {WAM-Flow: Parallel Coarse-to-Fine Motion Planning via Discrete Flow Matching for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24918-24928} }
Parameter-efficient Continual Learning for Enhancing Plasticity without Forgetting under Limited Model Capacity: Yitian Chen,

Shigeng Zhang,

Xuan Liu,

Mingming Lu,

Kai Chen,

Hongye Zhu,

Xinning Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yitian and Zhang, Shigeng and Liu, Xuan and Lu, Mingming and Chen, Kai and Zhu, Hongye and Chen, Xinning}, title = {Parameter-efficient Continual Learning for Enhancing Plasticity without Forgetting under Limited Model Capacity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10789-10798} }
ActionMesh: Animated 3D Mesh Generation with Temporal 3D Diffusion: Remy Sabathier,

David Novotny,

Niloy J. Mitra,

Tom Monnier; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sabathier_2026_CVPR, author = {Sabathier, Remy and Novotny, David and Mitra, Niloy J. and Monnier, Tom}, title = {ActionMesh: Animated 3D Mesh Generation with Temporal 3D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34312-34321} }
Towards Intrinsic-Aware Monocular 3D Object Detection: Zhihao Zhang,

Abhinav Kumar,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhihao and Kumar, Abhinav and Liu, Xiaoming}, title = {Towards Intrinsic-Aware Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40739-40750} }
Universal-to-Specific: Dynamic Knowledge-Guided Multiple Instance Learning for Few-Shot Whole Slide Image Classification: Junjian Li,

Hulin Kuang,

Jin Liu,

Hailin Yue,

Mengshen He,

Jianxin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Junjian and Kuang, Hulin and Liu, Jin and Yue, Hailin and He, Mengshen and Wang, Jianxin}, title = {Universal-to-Specific: Dynamic Knowledge-Guided Multiple Instance Learning for Few-Shot Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26614-26623} }
From Few-way to Many-way: Rethinking Few-shot Fine-grained Image Classification: Li-Jun Zhao,

Zhen-Duo Chen,

Xin Luo,

Xin-Shun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Li-Jun and Chen, Zhen-Duo and Luo, Xin and Xu, Xin-Shun}, title = {From Few-way to Many-way: Rethinking Few-shot Fine-grained Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12364-12373} }
Boosting Vision-Language-Action Finetuning with Feasible Action Neighborhood Prior: Haochen Niu,

Kanyu Zhang,

Shuyu Yin,

Qinghai Guo,

Peilin Liu,

Fei Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Haochen and Zhang, Kanyu and Yin, Shuyu and Guo, Qinghai and Liu, Peilin and Wen, Fei}, title = {Boosting Vision-Language-Action Finetuning with Feasible Action Neighborhood Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27956-27966} }
High-Precision Dichotomous Image Segmentation via Depth Integrity-Prior and Fine-Grained Patch Strategy: Xianjie Liu,

Keren Fu,

Qijun Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xianjie and Fu, Keren and Zhao, Qijun}, title = {High-Precision Dichotomous Image Segmentation via Depth Integrity-Prior and Fine-Grained Patch Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6357-6366} }
Multimodal RewardBench 2: Evaluating Omni Reward Models for Interleaved Text and Image: Yushi Hu,

Reyhane Askari-Hemmat,

Melissa Hall,

Emily Dinan,

Luke Zettlemoyer,

Marjan Ghazvininejad; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yushi and Askari-Hemmat, Reyhane and Hall, Melissa and Dinan, Emily and Zettlemoyer, Luke and Ghazvininejad, Marjan}, title = {Multimodal RewardBench 2: Evaluating Omni Reward Models for Interleaved Text and Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36904-36915} }
Ani3DHuman: Photorealistic 3D Human Animation with Self-guided Stochastic Sampling: Qi Sun,

Can Wang,

Jiaxiang Shang,

Yingchun Liu,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Qi and Wang, Can and Shang, Jiaxiang and Liu, Yingchun and Liao, Jing}, title = {Ani3DHuman: Photorealistic 3D Human Animation with Self-guided Stochastic Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12651-12662} }
VideoITG: Multimodal Video Understanding with Instructed Temporal Grounding: Shihao Wang,

Guo Chen,

De-An Huang,

Zhiqi Li,

Minghan Li,

Guilin Liu,

Jan Kautz,

Jose M. Alvarez,

Lei Zhang,

Zhiding Yu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shihao and Chen, Guo and Huang, De-An and Li, Zhiqi and Li, Minghan and Liu, Guilin and Kautz, Jan and Alvarez, Jose M. and Zhang, Lei and Yu, Zhiding}, title = {VideoITG: Multimodal Video Understanding with Instructed Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24640-24650} }
KAMP: Knowledge-Anchored Multimodal Pretraining Framework for Medical Image Representation: Feiyu Huang,

Jia Li,

Zhao Chen,

Yang Wu,

Caleb Chen Cao,

Lei Chen; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Feiyu and Li, Jia and Chen, Zhao and Wu, Yang and Cao, Caleb Chen and Chen, Lei}, title = {KAMP: Knowledge-Anchored Multimodal Pretraining Framework for Medical Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21067-21077} }
Accelerating Diffusion-based Video Editing via Heterogeneous Caching: Beyond Full Computing at Sampled Denoising Timestep: Tianyi Liu,

Ye Lu,

Linfeng Zhang,

Chen Cai,

Jianjun Gao,

Yi Wang,

Kim-Hui Yap,

Lap-Pui Chau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tianyi and Lu, Ye and Zhang, Linfeng and Cai, Chen and Gao, Jianjun and Wang, Yi and Yap, Kim-Hui and Chau, Lap-Pui}, title = {Accelerating Diffusion-based Video Editing via Heterogeneous Caching: Beyond Full Computing at Sampled Denoising Timestep}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35800-35809} }
Saliency-R1: Enforcing Interpretable and Faithful Vision-language Reasoning via Saliency-map Alignment Reward: Shizhan Gong,

Minda Hu,

Qiyuan Zhang,

Chen Ma,

Qi Dou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Shizhan and Hu, Minda and Zhang, Qiyuan and Ma, Chen and Dou, Qi}, title = {Saliency-R1: Enforcing Interpretable and Faithful Vision-language Reasoning via Saliency-map Alignment Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24095-24106} }
Plug-and-Play PDE Optimization for 3D Gaussian Splatting: Toward High-Quality Rendering and Reconstruction: Yifan Mo,

Youcheng Cai,

Ligang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mo_2026_CVPR, author = {Mo, Yifan and Cai, Youcheng and Liu, Ligang}, title = {Plug-and-Play PDE Optimization for 3D Gaussian Splatting: Toward High-Quality Rendering and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33333-33342} }
A Debiased Reconstruction-based Framework for Training-Free Detection of AI-Generated Images: Sungik Choi,

Hankook Lee,

Jaehoon Lee,

Robin Kim,

Stanley Jungkyu Choi,

Moontae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Sungik and Lee, Hankook and Lee, Jaehoon and Kim, Robin and Choi, Stanley Jungkyu and Lee, Moontae}, title = {A Debiased Reconstruction-based Framework for Training-Free Detection of AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3254-3263} }
UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition: Zhuangcheng Gu,

Guang Liang,

Bin Wang,

Zhiyuan Zhao,

Qintong Zhang,

Weijia Li,

Chao Xu,

Bo Zhang,

Botian Shi,

Jiang Wu,

Wentao Zhang,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zhuangcheng and Liang, Guang and Wang, Bin and Zhao, Zhiyuan and Zhang, Qintong and Li, Weijia and Xu, Chao and Zhang, Bo and Shi, Botian and Wu, Jiang and Zhang, Wentao and He, Conghui}, title = {UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34106-34115} }
Open-Vocabulary Domain Generalization in Urban-Scene Segmentation: Dong Zhao,

Qi Zang,

Nan Pu,

Wenjing Li,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dong and Zang, Qi and Pu, Nan and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {Open-Vocabulary Domain Generalization in Urban-Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20388-20398} }
Fourier Angle Alignment for Oriented Object Detection in Remote Sensing: Changyu Gu,

Linwei Chen,

Lin Gu,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Changyu and Chen, Linwei and Gu, Lin and Fu, Ying}, title = {Fourier Angle Alignment for Oriented Object Detection in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42225-42235} }
SimScale: Learning to Drive via Real-World Simulation at Scale: Haochen Tian,

Tianyu Li,

Haochen Liu,

Jiazhi Yang,

Yihang Qiu,

Guang Li,

Junli Wang,

Yinfeng Gao,

Zhang Zhang,

Liang Wang,

Hangjun Ye,

Long Chen,

Hongyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Haochen and Li, Tianyu and Liu, Haochen and Yang, Jiazhi and Qiu, Yihang and Li, Guang and Wang, Junli and Gao, Yinfeng and Zhang, Zhang and Wang, Liang and Ye, Hangjun and Chen, Long and Li, Hongyang}, title = {SimScale: Learning to Drive via Real-World Simulation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36365-36374} }
StyleTextGen: Style-Conditioned Multilingual Scene Text Generation: Zeyu Chen,

Fangmin Zhao,

Yan Shu,

Yichao Liu,

Liu Yu,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zeyu and Zhao, Fangmin and Shu, Yan and Liu, Yichao and Yu, Liu and Zhou, Yu}, title = {StyleTextGen: Style-Conditioned Multilingual Scene Text Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7643-7653} }
Single-step Diffusion-based Video Coding with Semantic-Temporal Guidance: Naifu Xue,

Zhaoyang Jia,

Jiahao Li,

Bin Li,

Zihan Zheng,

Yuan Zhang,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Naifu and Jia, Zhaoyang and Li, Jiahao and Li, Bin and Zheng, Zihan and Zhang, Yuan and Lu, Yan}, title = {Single-step Diffusion-based Video Coding with Semantic-Temporal Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9752-9761} }
Unified Customized Generation by Disentangled Reward Modeling: Shaojin Wu,

Mengqi Huang,

Yufeng Cheng,

Wenxu Wu,

Jiahe Tian,

Yiming Luo,

Fei Ding,

Qian He; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Shaojin and Huang, Mengqi and Cheng, Yufeng and Wu, Wenxu and Tian, Jiahe and Luo, Yiming and Ding, Fei and He, Qian}, title = {Unified Customized Generation by Disentangled Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34418-34427} }
AeroGS: Scale-Aware Gaussian Splatting for Pose-Free Dynamic UAV Scene Reconstruction: Tingyun Li,

Xinyi Liu,

Yongjun Zhang,

Yi Wan,

Xiaoan Liu,

Weiwei Fan,

Jiahao Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tingyun and Liu, Xinyi and Zhang, Yongjun and Wan, Yi and Liu, Xiaoan and Fan, Weiwei and Liu, Jiahao}, title = {AeroGS: Scale-Aware Gaussian Splatting for Pose-Free Dynamic UAV Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40907-40917} }
Neural Collapse in Test-Time Adaptation: Xiao Chen,

Zhongjing Du,

Jiazhen Huang,

Xu Jiang,

Li Lu,

Jingyan Jiang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Du, Zhongjing and Huang, Jiazhen and Jiang, Xu and Lu, Li and Jiang, Jingyan and Wang, Zhi}, title = {Neural Collapse in Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10567-10576} }
Learning to Control Physically-simulated 3D Characters via Generating and Mimicking 2D Motions: Jianan Li,

Xiao Chen,

Tao Huang,

Tien-Tsin Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jianan and Chen, Xiao and Huang, Tao and Wong, Tien-Tsin}, title = {Learning to Control Physically-simulated 3D Characters via Generating and Mimicking 2D Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38302-38312} }
DuoMo: Dual Motion Diffusion for World-Space Human Reconstruction: Yufu Wang,

Evonne Ng,

Soyong Shin,

Rawal Khirodkar,

Yuan Dong,

Zhaoen Su,

Jinhyung Park,

Kris Kitani,

Alexander Richard,

Fabian Prada,

Michael Zollhöfer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yufu and Ng, Evonne and Shin, Soyong and Khirodkar, Rawal and Dong, Yuan and Su, Zhaoen and Park, Jinhyung and Kitani, Kris and Richard, Alexander and Prada, Fabian and Zollh\"ofer, Michael}, title = {DuoMo: Dual Motion Diffusion for World-Space Human Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42777-42788} }
Towards Decompositional Human Motion Generation with Energy-Based Diffusion Models: Jianrong Zhang,

Hehe Fan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jianrong and Fan, Hehe and Yang, Yi}, title = {Towards Decompositional Human Motion Generation with Energy-Based Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30650-30660} }
HiFi-BRep: High-Fidelity Latent Representation for Robust B-Rep Generation: Junhao Hou,

Chenqi Luo,

Pufan Wang,

Jiaying Lu,

Yusheng Liu,

Feiwei Qin,

Meie Fang,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Junhao and Luo, Chenqi and Wang, Pufan and Lu, Jiaying and Liu, Yusheng and Qin, Feiwei and Fang, Meie and Zhou, Kun}, title = {HiFi-BRep: High-Fidelity Latent Representation for Robust B-Rep Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27199-27208} }
ManifoldGD: Training-Free Hierarchical Manifold Guidance for Diffusion-Based Dataset Distillation: Ayush Roy,

Wei-Yang Alex Lee,

Rudrasis Chakraborty,

Vishnu Suresh Lokhande; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roy_2026_CVPR, author = {Roy, Ayush and Lee, Wei-Yang Alex and Chakraborty, Rudrasis and Lokhande, Vishnu Suresh}, title = {ManifoldGD: Training-Free Hierarchical Manifold Guidance for Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12406-12416} }
Flash-DMD: Towards High-Fidelity Few-Step Image Generation with Efficient Distillation and Joint Reinforcement Learning: Guanjie Chen,

Shirui Huang,

Yifu Sun,

Kai Liu,

Jianchen Zhu,

Xiaoye Qu,

Yu Cheng,

Peng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Guanjie and Huang, Shirui and Sun, Yifu and Liu, Kai and Zhu, Jianchen and Qu, Xiaoye and Cheng, Yu and Chen, Peng}, title = {Flash-DMD: Towards High-Fidelity Few-Step Image Generation with Efficient Distillation and Joint Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6010-6020} }
TAP: A Token-Adaptive Predictor Framework for Training-Free Diffusion Acceleration: Haowei Zhu,

Tingxuan Huang,

Xing Wang,

Tianyu Zhao,

Jiexi Wang,

Weifeng Chen,

Xurui Peng,

Fangmin Chen,

Junhai Yong,

Bin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Haowei and Huang, Tingxuan and Wang, Xing and Zhao, Tianyu and Wang, Jiexi and Chen, Weifeng and Peng, Xurui and Chen, Fangmin and Yong, Junhai and Wang, Bin}, title = {TAP: A Token-Adaptive Predictor Framework for Training-Free Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36082-36091} }
Debiased Sample Selection for Learning with Noisy Labels: Weiran Pan,

Wei Wei,

Wenfeng Xie; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Weiran and Wei, Wei and Xie, Wenfeng}, title = {Debiased Sample Selection for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32047-32057} }
SpiralDiff: Spiral Diffusion with LoRA for RGB-to-RAW Conversion Across Cameras: Huanjing Yue,

Shangbin Xie,

Cong Cao,

Qian Wu,

Lei Zhang,

Lei Zhao,

Jingyu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Huanjing and Xie, Shangbin and Cao, Cong and Wu, Qian and Zhang, Lei and Zhao, Lei and Yang, Jingyu}, title = {SpiralDiff: Spiral Diffusion with LoRA for RGB-to-RAW Conversion Across Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38453-38463} }
Inter-Photon-Limited Videography: Andrew Xie,

Dongyu Du,

Sotiris Nousias,

David B. Lindell,

Kiriakos N. Kutulakos; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Andrew and Du, Dongyu and Nousias, Sotiris and Lindell, David B. and Kutulakos, Kiriakos N.}, title = {Inter-Photon-Limited Videography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34006-34015} }
MambaSIC: Mamba-based Stereo Image Compression with Bi-directional Multi-reference Entropy Model: Shiyu Qin,

Xinjie Zhang,

Zhening Liu,

Jinpeng Wang,

Bin Chen,

Jiawei Li,

Yifan Ren,

Shu-Tao Xia,

Jun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Shiyu and Zhang, Xinjie and Liu, Zhening and Wang, Jinpeng and Chen, Bin and Li, Jiawei and Ren, Yifan and Xia, Shu-Tao and Zhang, Jun}, title = {MambaSIC: Mamba-based Stereo Image Compression with Bi-directional Multi-reference Entropy Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5306-5315} }
GeodesicNVS: Probability Density Geodesic Flow Matching for Novel View Synthesis: Xuqin Wang,

Tao Wu,

Yanfeng Zhang,

Lu Liu,

Mingwei Sun,

Yongliang Wang,

Niclas Zeller,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuqin and Wu, Tao and Zhang, Yanfeng and Liu, Lu and Sun, Mingwei and Wang, Yongliang and Zeller, Niclas and Cremers, Daniel}, title = {GeodesicNVS: Probability Density Geodesic Flow Matching for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40316-40326} }
Text-Printed Image: Bridging the Image-Text Modality Gap for Text-centric Training of Large Vision-Language Models: Shojiro Yamabe,

Futa Waseda,

Daiki Shiono,

Tsubasa Takahashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamabe_2026_CVPR, author = {Yamabe, Shojiro and Waseda, Futa and Shiono, Daiki and Takahashi, Tsubasa}, title = {Text-Printed Image: Bridging the Image-Text Modality Gap for Text-centric Training of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17270-17281} }
HBridge: H-Shape Bridging of Heterogeneous Experts for Unified Multimodal Understanding and Generation: Xiang Wang,

Zhifei Zhang,

He Zhang,

Zhe Lin,

Yuqian Zhou,

Qing Liu,

Shiwei Zhang,

Yijun Li,

Shaoteng Liu,

Haitian Zheng,

Jason Kuen,

Yuehuan Wang,

Changxin Gao,

Nong Sang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiang and Zhang, Zhifei and Zhang, He and Lin, Zhe and Zhou, Yuqian and Liu, Qing and Zhang, Shiwei and Li, Yijun and Liu, Shaoteng and Zheng, Haitian and Kuen, Jason and Wang, Yuehuan and Gao, Changxin and Sang, Nong}, title = {HBridge: H-Shape Bridging of Heterogeneous Experts for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14767-14778} }
InsCal: Calibrated Multi-Source Fully Test-Time Prompt Tuning for Object Detection: Xiaofan Que,

Dingrong Wang,

Xumin Liu,

Qi Yu; [pdf] [supp]
[bibtex]
@InProceedings{Que_2026_CVPR, author = {Que, Xiaofan and Wang, Dingrong and Liu, Xumin and Yu, Qi}, title = {InsCal: Calibrated Multi-Source Fully Test-Time Prompt Tuning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36936-36946} }
ActivePolicy: Active Gaussian Reconstruction and Optimization Strategy Based on Global-Local Information Gain: Yingzhao Li,

Yanjie Liu,

Lijun Zhao; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yingzhao and Liu, Yanjie and Zhao, Lijun}, title = {ActivePolicy: Active Gaussian Reconstruction and Optimization Strategy Based on Global-Local Information Gain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5003-5013} }
Unified Number-Free Text-to-Motion Generation Via Flow Matching: Guanhe Huang,

Oya Celiktutan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Guanhe and Celiktutan, Oya}, title = {Unified Number-Free Text-to-Motion Generation Via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23570-23580} }
Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models: Keuntae Kim,

Mingyu Kang,

Yong Suk Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Keuntae and Kang, Mingyu and Choi, Yong Suk}, title = {Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5154-5164} }
Mixture of States: Routing Token-Level Dynamics for Multimodal Generation: Haozhe Liu,

Ding Liu,

Mingchen Zhuge,

Zijian Zhou,

Tian Xie,

Sen He,

Yukang Yang,

Shuming Liu,

Yuren Cong,

Jiadong Guo,

Hongyu Xu,

Ke Xu,

Kam-Woh Ng,

Juan C. Perez,

Juan-Manuel Perez-Rua,

Tao Xiang,

Wei Liu,

Shikun Liu,

Jürgen Schmidhuber; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haozhe and Liu, Ding and Zhuge, Mingchen and Zhou, Zijian and Xie, Tian and He, Sen and Yang, Yukang and Liu, Shuming and Cong, Yuren and Guo, Jiadong and Xu, Hongyu and Xu, Ke and Ng, Kam-Woh and Perez, Juan C. and Perez-Rua, Juan-Manuel and Xiang, Tao and Liu, Wei and Liu, Shikun and Schmidhuber, J\"urgen}, title = {Mixture of States: Routing Token-Level Dynamics for Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36781-36792} }
Bidirectional Multimodal Prompt Learning with Scale-Aware Training for Few-Shot Multi-Class Anomaly Detection: Yujin Lee,

Sewon Kim,

Daeun Moon,

Seoyoon Jang,

Hyunsoo Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yujin and Kim, Sewon and Moon, Daeun and Jang, Seoyoon and Yoon, Hyunsoo}, title = {Bidirectional Multimodal Prompt Learning with Scale-Aware Training for Few-Shot Multi-Class Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35577-35586} }
Visual-Aware CoT: Achieving High-Fidelity Visual Consistency in Unified Models: Zixuan Ye,

Quande Liu,

Cong Wei,

Yuanxing Zhang,

Xintao Wang,

Pengfei Wan,

Kun Gai,

Wenhan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Zixuan and Liu, Quande and Wei, Cong and Zhang, Yuanxing and Wang, Xintao and Wan, Pengfei and Gai, Kun and Luo, Wenhan}, title = {Visual-Aware CoT: Achieving High-Fidelity Visual Consistency in Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9116-9126} }
LaDy: Lagrangian-Dynamic Informed Network for Skeleton-based Action Segmentation via Spatial-Temporal Modulation: Haoyu Ji,

Xueting Liu,

Yu Gao,

Wenze Huang,

Zhihao Yang,

Weihong Ren,

Zhiyong Wang,

Honghai Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Haoyu and Liu, Xueting and Gao, Yu and Huang, Wenze and Yang, Zhihao and Ren, Weihong and Wang, Zhiyong and Liu, Honghai}, title = {LaDy: Lagrangian-Dynamic Informed Network for Skeleton-based Action Segmentation via Spatial-Temporal Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34449-34459} }
GenTract: Generative Global Tractography: Alec Sargood,

Lemuel Puglisi,

Elinor Thompson,

Mirco Musolesi,

Daniel C. Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargood_2026_CVPR, author = {Sargood, Alec and Puglisi, Lemuel and Thompson, Elinor and Musolesi, Mirco and Alexander, Daniel C.}, title = {GenTract: Generative Global Tractography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35291-35300} }
VQ-VA World: Towards High-Quality Visual Question-Visual Answering: Chenhui Gou,

Zilong Chen,

Zeyu Wang,

Feng Li,

Deyao Zhu,

Zicheng Duan,

Kunchang Li,

Chaorui Deng,

Hongyi Yuan,

Haoqi Fan,

Cihang Xie,

Jianfei Cai,

Hamid Rezatofighi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gou_2026_CVPR, author = {Gou, Chenhui and Chen, Zilong and Wang, Zeyu and Li, Feng and Zhu, Deyao and Duan, Zicheng and Li, Kunchang and Deng, Chaorui and Yuan, Hongyi and Fan, Haoqi and Xie, Cihang and Cai, Jianfei and Rezatofighi, Hamid}, title = {VQ-VA World: Towards High-Quality Visual Question-Visual Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18837-18847} }
From Contrast to Consistency: Rethinking Event-based Continuous-Time Optical Flow Estimation: Rui Hu,

Song Wu,

Wen Yang,

Jinjian Wu; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Rui and Wu, Song and Yang, Wen and Wu, Jinjian}, title = {From Contrast to Consistency: Rethinking Event-based Continuous-Time Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15125-15134} }
ReCALL: Recalibrating Capability Degradation for MLLM-based Composed Image Retrieval: Tianyu Yang,

ChenWei He,

Xiangzhao Hao,

Tianyue Wang,

Jiarui Guo,

Haiyun Guo,

Leigang Qu,

Jinqiao Wang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Tianyu and He, ChenWei and Hao, Xiangzhao and Wang, Tianyue and Guo, Jiarui and Guo, Haiyun and Qu, Leigang and Wang, Jinqiao and Chua, Tat-Seng}, title = {ReCALL: Recalibrating Capability Degradation for MLLM-based Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38763-38773} }
Adaptive Learned Image Compression with Graph Neural Networks: Yunuo Chen,

Bing He,

Zezheng Lyu,

Hongwei Hu,

Qunshan Gu,

Yuan Tian,

Guo Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yunuo and He, Bing and Lyu, Zezheng and Hu, Hongwei and Gu, Qunshan and Tian, Yuan and Lu, Guo}, title = {Adaptive Learned Image Compression with Graph Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12150-12161} }
Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training: Jinbo Xing,

Zeyinzi Jiang,

Yuxiang Tuo,

Chaojie Mao,

Xiaotang Gai,

Xi Chen,

Jingfeng Zhang,

Yulin Pan,

Zhen Han,

Jie Xiao,

Keyu Yan,

Chenwei Xie,

Chongyang Zhong,

Kai Zhu,

Tong Shen,

Lianghua Huang,

Yu Liu,

Yujiu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Jinbo and Jiang, Zeyinzi and Tuo, Yuxiang and Mao, Chaojie and Gai, Xiaotang and Chen, Xi and Zhang, Jingfeng and Pan, Yulin and Han, Zhen and Xiao, Jie and Yan, Keyu and Xie, Chenwei and Zhong, Chongyang and Zhu, Kai and Shen, Tong and Huang, Lianghua and Liu, Yu and Yang, Yujiu}, title = {Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36267-36278} }
TINA: Text-Free Inversion Attack for Unlearned Text-to-Image Diffusion Models: Qianlong Xiang,

Miao Zhang,

Haoyu Zhang,

Kun Wang,

Junhui Hou,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qianlong and Zhang, Miao and Zhang, Haoyu and Wang, Kun and Hou, Junhui and Nie, Liqiang}, title = {TINA: Text-Free Inversion Attack for Unlearned Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30076-30086} }
ChordEdit: One-Step Low-Energy Transport for Image Editing: Liangsi Lu,

Xuhang Chen,

Minzhe Guo,

Shichu Li,

Jingchao Wang,

Yang Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Liangsi and Chen, Xuhang and Guo, Minzhe and Li, Shichu and Wang, Jingchao and Shi, Yang}, title = {ChordEdit: One-Step Low-Energy Transport for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14398-14407} }
MagicFuse: Single Image Fusion for Visual and Semantic Reinforcement: Hao Zhang,

Yanping Zha,

Zizhuo Li,

Meiqi Gong,

Jiayi Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Zha, Yanping and Li, Zizhuo and Gong, Meiqi and Ma, Jiayi}, title = {MagicFuse: Single Image Fusion for Visual and Semantic Reinforcement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26551-26560} }
MUSE: Harnessing Precise and Diverse Semantics for Few-Shot Whole Slide Image Classification: Jiahao Xu,

Sheng Huang,

Xin Zhang,

Zhixiong Nan,

Jiajun Dong,

Nankun Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiahao and Huang, Sheng and Zhang, Xin and Nan, Zhixiong and Dong, Jiajun and Mu, Nankun}, title = {MUSE: Harnessing Precise and Diverse Semantics for Few-Shot Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33911-33921} }
RobotSeg: A Model and Dataset for Segmenting Robots in Image and Video: Haiyang Mei,

Qiming Huang,

Hai Ci,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2026_CVPR, author = {Mei, Haiyang and Huang, Qiming and Ci, Hai and Shou, Mike Zheng}, title = {RobotSeg: A Model and Dataset for Segmenting Robots in Image and Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14346-14356} }
Self-supervised Dynamic Heterogeneous Degradation Modeling for Unified Zero-Shot Image Restoration: XiaoWan Hu,

Jing Yang,

HeNan Liu,

HuaQiu Li,

Mai Xu; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, XiaoWan and Yang, Jing and Liu, HeNan and Li, HuaQiu and Xu, Mai}, title = {Self-supervised Dynamic Heterogeneous Degradation Modeling for Unified Zero-Shot Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22658-22668} }
Tutor-Student Reinforcement Learning: A Dynamic Curriculum for Robust Deepfake Detection: Zhanhe Lei,

Zhongyuan Wang,

Jikang Cheng,

Baojin Huang,

Yuhong Yang,

Zhen Han,

Chao Liang,

Dengpan Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Zhanhe and Wang, Zhongyuan and Cheng, Jikang and Huang, Baojin and Yang, Yuhong and Han, Zhen and Liang, Chao and Ye, Dengpan}, title = {Tutor-Student Reinforcement Learning: A Dynamic Curriculum for Robust Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41436-41445} }
WalkGPT: Grounded Vision-Language Conversation with Depth-Aware Segmentation for Pedestrian Navigation: Rafi Ibn Sultan,

Hui Zhu,

Xiangyu Zhou,

Chengyin Li,

Prashant Khanduri,

Marco Brocanelli,

Dongxiao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ibn_Sultan_2026_CVPR, author = {Ibn Sultan, Rafi and Zhu, Hui and Zhou, Xiangyu and Li, Chengyin and Khanduri, Prashant and Brocanelli, Marco and Zhu, Dongxiao}, title = {WalkGPT: Grounded Vision-Language Conversation with Depth-Aware Segmentation for Pedestrian Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40085-40095} }
Learning to Learn Weight Generation via Local Consistency Diffusion: Yunchuan Guan,

Yu Liu,

Ke Zhou,

Zhiqi Shen,

Jenq-Neng Hwang,

Lei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Yunchuan and Liu, Yu and Zhou, Ke and Shen, Zhiqi and Hwang, Jenq-Neng and Li, Lei}, title = {Learning to Learn Weight Generation via Local Consistency Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19623-19633} }
ProOOD: Prototype-Guided Out-of-Distribution 3D Occupancy Prediction: Yuheng Zhang,

Mengfei Duan,

Kunyu Peng,

Yuhang Wang,

Di Wen,

Danda Pani Paudel,

Luc Van Gool,

Kailun Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuheng and Duan, Mengfei and Peng, Kunyu and Wang, Yuhang and Wen, Di and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun}, title = {ProOOD: Prototype-Guided Out-of-Distribution 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14241-14252} }
ZeroIDIR: Zero-Reference Illumination Degradation Image Restoration with Perturbed Consistency Diffusion Models: Hai Jiang,

Zhen Liu,

Yinjie Lei,

Songchen Han,

Bing Zeng,

Shuaicheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Hai and Liu, Zhen and Lei, Yinjie and Han, Songchen and Zeng, Bing and Liu, Shuaicheng}, title = {ZeroIDIR: Zero-Reference Illumination Degradation Image Restoration with Perturbed Consistency Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1320-1330} }
HiFi-Inpaint: Towards High-Fidelity Reference-Based Inpainting for Generating Detail-Preserving Human-Product Images: Yichen Liu,

Donghao Zhou,

Jie Wang,

Xin Gao,

Guisheng Liu,

Jiatong Li,

Quanwei Zhang,

Qiang Lyu,

Lanqing Guo,

Shilei Wen,

Weiqiang Wang,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yichen and Zhou, Donghao and Wang, Jie and Gao, Xin and Liu, Guisheng and Li, Jiatong and Zhang, Quanwei and Lyu, Qiang and Guo, Lanqing and Wen, Shilei and Wang, Weiqiang and Heng, Pheng-Ann}, title = {HiFi-Inpaint: Towards High-Fidelity Reference-Based Inpainting for Generating Detail-Preserving Human-Product Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1994-2004} }
3D-LATTE: Latent Space 3D Editing from Textual Instructions: Maria Parelli,

Michael Oechsle,

Michael Niemeyer,

Federico Tombari,

Andreas Geiger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parelli_2026_CVPR, author = {Parelli, Maria and Oechsle, Michael and Niemeyer, Michael and Tombari, Federico and Geiger, Andreas}, title = {3D-LATTE: Latent Space 3D Editing from Textual Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14377-14386} }
NI-Tex: Non-isometric Image-based Garment Texture Generation: Hui Shan,

Ming Li,

Haitao Yang,

Kai Zheng,

Sizhe Zheng,

Yanwei Fu,

Xiangru Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Hui and Li, Ming and Yang, Haitao and Zheng, Kai and Zheng, Sizhe and Fu, Yanwei and Huang, Xiangru}, title = {NI-Tex: Non-isometric Image-based Garment Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19884-19893} }
Active Intelligence in Video Avatars via Closed-loop World Modeling: Xuanhua He,

Tianyu Yang,

Ke Cao,

Ruiqi Wu,

Cheng Meng,

Yong Zhang,

Zhuoliang Kang,

Xiaoming Wei,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xuanhua and Yang, Tianyu and Cao, Ke and Wu, Ruiqi and Meng, Cheng and Zhang, Yong and Kang, Zhuoliang and Wei, Xiaoming and Chen, Qifeng}, title = {Active Intelligence in Video Avatars via Closed-loop World Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27239-27248} }
DLVP-CLIP: Enhancing Fine-Grained Zero-Shot Anomaly Detection via Dynamic Local Visual Prompting: Gaowei Zhang,

Lihe Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gaowei and Zhang, Lihe}, title = {DLVP-CLIP: Enhancing Fine-Grained Zero-Shot Anomaly Detection via Dynamic Local Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35524-35533} }
Minimal Constraint Relaxation for Multiview Autocalibration: Norio Kosaka,

Timothy Duff,

Tomas Pajdla; [pdf] [supp]
[bibtex]
@InProceedings{Kosaka_2026_CVPR, author = {Kosaka, Norio and Duff, Timothy and Pajdla, Tomas}, title = {Minimal Constraint Relaxation for Multiview Autocalibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28937-28946} }
SODA: Sensitivity-Oriented Dynamic Acceleration for Diffusion Transformer: Tong Shao,

Yusen Fu,

Guoying Sun,

Jingde Kong,

Zhuotao Tian,

Jingyong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Tong and Fu, Yusen and Sun, Guoying and Kong, Jingde and Tian, Zhuotao and Su, Jingyong}, title = {SODA: Sensitivity-Oriented Dynamic Acceleration for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33012-33021} }
ClusterMark: Towards Robust Watermarking for Autoregressive Image Generators with Visual Token Clustering: Denis Lukovnikov,

Andreas Müller,

Erwin Quiring,

Asja Fischer; [pdf] [supp]
[bibtex]
@InProceedings{Lukovnikov_2026_CVPR, author = {Lukovnikov, Denis and M\"uller, Andreas and Quiring, Erwin and Fischer, Asja}, title = {ClusterMark: Towards Robust Watermarking for Autoregressive Image Generators with Visual Token Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9213-9222} }
PhysHO: Physics-Based Dynamic 3D Gaussian Human and Object from Monocular Video: Suyi Jiang,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Suyi and Lee, Gim Hee}, title = {PhysHO: Physics-Based Dynamic 3D Gaussian Human and Object from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32507-32517} }
CaST-Bench: Benchmarking Causal Chain-Grounded Spatio-Temporal Reasoning for Video Question Answering: Mingfang Zhang,

Jingjing Pan,

Ashutosh Kumar,

Rajat Saini,

Mustafa Erdogan,

Hsuan-Kung Yang,

Caixin Kang,

Yifei Huang,

Yoichi Sato,

Quan Kong; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingfang and Pan, Jingjing and Kumar, Ashutosh and Saini, Rajat and Erdogan, Mustafa and Yang, Hsuan-Kung and Kang, Caixin and Huang, Yifei and Sato, Yoichi and Kong, Quan}, title = {CaST-Bench: Benchmarking Causal Chain-Grounded Spatio-Temporal Reasoning for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31856-31866} }
The Coherence Trap: When MLLM-Crafted Narratives Exploit Manipulated Visual Contexts: Yuchen Zhang,

Yaxiong Wang,

Yujiao Wu,

Lianwei Wu,

Li Zhu,

Zhedong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuchen and Wang, Yaxiong and Wu, Yujiao and Wu, Lianwei and Zhu, Li and Zheng, Zhedong}, title = {The Coherence Trap: When MLLM-Crafted Narratives Exploit Manipulated Visual Contexts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8760-8769} }
Generative Diffusion Priors for 3D Mapping of the Dark Universe: Brandon Zhao,

Diana Scognamiglio,

Olivier Doré,

Katherine L. Bouman; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Brandon and Scognamiglio, Diana and Dor\'e, Olivier and Bouman, Katherine L.}, title = {Generative Diffusion Priors for 3D Mapping of the Dark Universe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23581-23590} }
PlanaReLoc: Camera Relocalization in 3D Planar Primitives via Region-Based Structure Matching: Hanqiao Ye,

Yuzhou Liu,

Yangdong Liu,

Shuhan Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Hanqiao and Liu, Yuzhou and Liu, Yangdong and Shen, Shuhan}, title = {PlanaReLoc: Camera Relocalization in 3D Planar Primitives via Region-Based Structure Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26409-26421} }
Generalizable Knowledge Distillation from Vision Foundation Models for Semantic Segmentation: Chonghua Lv,

Dong Zhao,

Shuang Wang,

Dou Quan,

Ning Huyan,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Chonghua and Zhao, Dong and Wang, Shuang and Quan, Dou and Huyan, Ning and Sebe, Nicu and Zhong, Zhun}, title = {Generalizable Knowledge Distillation from Vision Foundation Models for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26656-26666} }
Coverage Optimization for Camera View Selection: Timothy Chen,

Adam Dai,

Maximilian Adang,

Grace Gao,

Mac Schwager; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Timothy and Dai, Adam and Adang, Maximilian and Gao, Grace and Schwager, Mac}, title = {Coverage Optimization for Camera View Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19443-19451} }
Customized Fusion: A Closed-Loop Dynamic Network for Adaptive Multi-Task-Aware Infrared-Visible Image Fusion: Zengyi Yang,

Yu Liu,

Juan Cheng,

Zhiqin Zhu,

Yafei Zhang,

Huafeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zengyi and Liu, Yu and Cheng, Juan and Zhu, Zhiqin and Zhang, Yafei and Li, Huafeng}, title = {Customized Fusion: A Closed-Loop Dynamic Network for Adaptive Multi-Task-Aware Infrared-Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {188-198} }
StableMaterials: Enhancing Diversity in Material Generation via Semi-Supervised Learning: Giuseppe Vecchio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vecchio_2026_CVPR, author = {Vecchio, Giuseppe}, title = {StableMaterials: Enhancing Diversity in Material Generation via Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19665-19675} }
MEMO: Human-like Crisp Edge Detection Using Masked Edge Prediction: Jiaxin Cheng,

Yue Wu,

Yicong Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Jiaxin and Wu, Yue and Zhou, Yicong}, title = {MEMO: Human-like Crisp Edge Detection Using Masked Edge Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27740-27749} }
TTRV: Test-Time Reinforcement Learning for Vision Language Models: Akshit Singh,

Shyam Marjit,

Wei Lin,

Paul Gavrikov,

Serena Yeung-Levy,

Hilde Kuehne,

Rogerio Feris,

Sivan Doveh,

James Glass,

M. Jehanzeb Mirza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Akshit and Marjit, Shyam and Lin, Wei and Gavrikov, Paul and Yeung-Levy, Serena and Kuehne, Hilde and Feris, Rogerio and Doveh, Sivan and Glass, James and Mirza, M. Jehanzeb}, title = {TTRV: Test-Time Reinforcement Learning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33153-33163} }
CaT-GS: Efficient 3DGS Rendering for Large-Scale Scenes with Inter-frame Caching and Tile Scheduling: Tingjia Zhang,

Bo Chen,

Shengzhong Liu,

Fan Wu,

Guihai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tingjia and Chen, Bo and Liu, Shengzhong and Wu, Fan and Chen, Guihai}, title = {CaT-GS: Efficient 3DGS Rendering for Large-Scale Scenes with Inter-frame Caching and Tile Scheduling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37391-37400} }
FUSER: Feed-Forward Multiview 3D Registration Transformer and SE(3)$^N$ Diffusion Refinement: Haobo Jiang,

Jin Xie,

Jian Yang,

Liang Yu,

Jianmin Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haobo and Xie, Jin and Yang, Jian and Yu, Liang and Zheng, Jianmin}, title = {FUSER: Feed-Forward Multiview 3D Registration Transformer and SE(3)\${\textasciicircum}N\$ Diffusion Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7393-7403} }
SpatialScore: Towards Comprehensive Evaluation for Spatial Intelligence: Haoning Wu,

Xiao Huang,

Yaohui Chen,

Ya Zhang,

Yanfeng Wang,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haoning and Huang, Xiao and Chen, Yaohui and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {SpatialScore: Towards Comprehensive Evaluation for Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31029-31041} }
PointCSP: Cross-Sample Semantic Propagation and Stability Preservation in Self-Supervised Point Cloud Learning: Xinxing Yu,

Ajian Liu,

Sunyuan Qiang,

Hui Ma,

Liying Yang,

Yuzhong Wang,

Zhi Rao,

Yanyan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xinxing and Liu, Ajian and Qiang, Sunyuan and Ma, Hui and Yang, Liying and Wang, Yuzhong and Rao, Zhi and Liang, Yanyan}, title = {PointCSP: Cross-Sample Semantic Propagation and Stability Preservation in Self-Supervised Point Cloud Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10016-10026} }
Enhance-then-Balance Modality Collaboration for Robust Multimodal Sentiment Analysis: Kang He,

Yuzhe Ding,

Xinrong Wang,

Fei Li,

Chong Teng,

Donghong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Kang and Ding, Yuzhe and Wang, Xinrong and Li, Fei and Teng, Chong and Ji, Donghong}, title = {Enhance-then-Balance Modality Collaboration for Robust Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30183-30193} }
SynMotion: Semantic-Visual Adaptation for Motion Customized Video Generation: Shuai Tan,

Biao Gong,

Yujie Wei,

Shiwei Zhang,

Zhuoxin Liu,

Ke Ma,

Yan Wang,

Kecheng Zheng,

Xing Zhu,

Yujun Shen,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Shuai and Gong, Biao and Wei, Yujie and Zhang, Shiwei and Liu, Zhuoxin and Ma, Ke and Wang, Yan and Zheng, Kecheng and Zhu, Xing and Shen, Yujun and Zhao, Hengshuang}, title = {SynMotion: Semantic-Visual Adaptation for Motion Customized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30477-30489} }
UniSER: A Foundation Model for Unified Soft Effects Removal: Jingdong Zhang,

Lingzhi Zhang,

Qing Liu,

Mang Tik Chiu,

Connelly Barnes,

Yizhou Wang,

Haoran You,

Xiaoyang Liu,

Yuqian Zhou,

Zhe Lin,

Eli Shechtman,

Sohrab Amirghodsi,

Xin Li,

Wenping Wang,

Xiaohang Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingdong and Zhang, Lingzhi and Liu, Qing and Chiu, Mang Tik and Barnes, Connelly and Wang, Yizhou and You, Haoran and Liu, Xiaoyang and Zhou, Yuqian and Lin, Zhe and Shechtman, Eli and Amirghodsi, Sohrab and Li, Xin and Wang, Wenping and Zhan, Xiaohang}, title = {UniSER: A Foundation Model for Unified Soft Effects Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16096-16107} }
EvObj: Learning Evolving Object-centric Representations for 3D Instance Segmentation without Scene Supervision: Jiahao Chen,

Zihui Zhang,

Yafei Yang,

Jinxi Li,

Shenxing Wei,

Zhixuan Sun,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiahao and Zhang, Zihui and Yang, Yafei and Li, Jinxi and Wei, Shenxing and Sun, Zhixuan and Yang, Bo}, title = {EvObj: Learning Evolving Object-centric Representations for 3D Instance Segmentation without Scene Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39817-39826} }
Joint Learning of General and Diverse Patterns with Mixture of Memory Experts for Weakly-Supervised Video Anomaly Detection: Bo Sun,

Junxi Chen,

Zhe Wu,

Feng Gao,

Fan Yang,

Li Su,

Yaowei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Bo and Chen, Junxi and Wu, Zhe and Gao, Feng and Yang, Fan and Su, Li and Wang, Yaowei}, title = {Joint Learning of General and Diverse Patterns with Mixture of Memory Experts for Weakly-Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35638-35647} }
Efficient Frame Selection for Long Video Understanding via Reinforcement Learning: Yaxuan Qin,

Hefei Li,

Wenqi Mu,

Yancheng He; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Yaxuan and Li, Hefei and Mu, Wenqi and He, Yancheng}, title = {Efficient Frame Selection for Long Video Understanding via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16944-16953} }
Mask to Align, Weight to Disambiguate: Reliable Unsupervised Cross-Modal Hashing with Masked-Weight Contrast: Fan Yang,

Yuanzhi Zhao,

Haimei Zhao,

Yudong Zhao,

Haikun Xu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Fan and Zhao, Yuanzhi and Zhao, Haimei and Zhao, Yudong and Xu, Haikun}, title = {Mask to Align, Weight to Disambiguate: Reliable Unsupervised Cross-Modal Hashing with Masked-Weight Contrast}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30151-30161} }
Neighbor GRPO: Contrastive ODE Policy Optimization Aligns Flow Models: Dailan He,

Guanlin Feng,

Xingtong Ge,

Yazhe Niu,

Yi Zhang,

Bingqi Ma,

Guanglu Song,

Yu Liu,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Dailan and Feng, Guanlin and Ge, Xingtong and Niu, Yazhe and Zhang, Yi and Ma, Bingqi and Song, Guanglu and Liu, Yu and Li, Hongsheng}, title = {Neighbor GRPO: Contrastive ODE Policy Optimization Aligns Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6033-6042} }
CustomTex: High-fidelity Indoor Scene Texturing via Multi-Reference Customization: Weilin Chen,

Jiahao Rao,

Wenhao Wang,

Xinyang Li,

Xuan Cheng,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Weilin and Rao, Jiahao and Wang, Wenhao and Li, Xinyang and Cheng, Xuan and Cao, Liujuan}, title = {CustomTex: High-fidelity Indoor Scene Texturing via Multi-Reference Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4280-4290} }
PIX-TAB: Efficient PIXel-Precise TABle Structure Recognition Approach with Speculative Decoding and Region-Based Image Segmentation: Viktor Zaytsev,

Olena Vynokurova,

Pavlo Tytarchuk,

Dmytro Kozii,

Vitalii Pohribnyi,

Olga Radyvonenko,

Artem Shcherbina; [pdf] [supp]
[bibtex]
@InProceedings{Zaytsev_2026_CVPR, author = {Zaytsev, Viktor and Vynokurova, Olena and Tytarchuk, Pavlo and Kozii, Dmytro and Pohribnyi, Vitalii and Radyvonenko, Olga and Shcherbina, Artem}, title = {PIX-TAB: Efficient PIXel-Precise TABle Structure Recognition Approach with Speculative Decoding and Region-Based Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23912-23921} }
SeeGroup: Multi-Layer Depth Estimation of Transparent Surfaces via Self-Determined Grouping: Hongyu Wen,

Jia Deng; [pdf] [supp]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Hongyu and Deng, Jia}, title = {SeeGroup: Multi-Layer Depth Estimation of Transparent Surfaces via Self-Determined Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7299-7309} }
Any2Any 3D Diffusion Models with Knowledge Transfer: A Radiotherapy Planning Study: Yuhan Wang,

Zihan Li,

Han Liu,

Simon Arberet,

Martin Kraus,

Yuyin Zhou,

Florin-Cristian Ghesu,

Dorin Comaniciu,

Ali Kamen,

Riqiang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhan and Li, Zihan and Liu, Han and Arberet, Simon and Kraus, Martin and Zhou, Yuyin and Ghesu, Florin-Cristian and Comaniciu, Dorin and Kamen, Ali and Gao, Riqiang}, title = {Any2Any 3D Diffusion Models with Knowledge Transfer: A Radiotherapy Planning Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16520-16530} }
CrossVL: Complexity-Aware Feature Routing and Paired Curriculum for Cross-View Vision-Language Detection: Zhipeng Liu,

Chunbo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhipeng and Luo, Chunbo}, title = {CrossVL: Complexity-Aware Feature Routing and Paired Curriculum for Cross-View Vision-Language Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10116-10125} }
Grounding Everything in Tokens for Multimodal Large Language Models: Xiangxuan Ren,

Zhongdao Wang,

Liping Hou,

Pin Tang,

Guoqing Wang,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Xiangxuan and Wang, Zhongdao and Hou, Liping and Tang, Pin and Wang, Guoqing and Ma, Chao}, title = {Grounding Everything in Tokens for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41171-41181} }
GFRRN: Explore the Gaps in Single Image Reflection Removal: Yu Chen,

Zewei He,

Xingyu Liu,

Zixuan Chen,

Zhe-Ming Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yu and He, Zewei and Liu, Xingyu and Chen, Zixuan and Lu, Zhe-Ming}, title = {GFRRN: Explore the Gaps in Single Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5690-5699} }
VLM-PTQ: Efficient Post-Training Quantization for Large Vision-Language Models: Juncan Deng,

Kejie Huang; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Juncan and Huang, Kejie}, title = {VLM-PTQ: Efficient Post-Training Quantization for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24696-24705} }
MoVieS: Motion-Aware 4D Dynamic View Synthesis in One Second: Chenguo Lin,

Yuchen Lin,

Panwang Pan,

Yifan Yu,

Tao Hu,

Honglei Yan,

Katerina Fragkiadaki,

Yadong Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Chenguo and Lin, Yuchen and Pan, Panwang and Yu, Yifan and Hu, Tao and Yan, Honglei and Fragkiadaki, Katerina and Mu, Yadong}, title = {MoVieS: Motion-Aware 4D Dynamic View Synthesis in One Second}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {295-306} }
Affostruction: 3D Affordance Grounding with Generative Reconstruction: Chunghyun Park,

Seunghyeon Lee,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Chunghyun and Lee, Seunghyeon and Cho, Minsu}, title = {Affostruction: 3D Affordance Grounding with Generative Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7435-7445} }
PointCNN++: Performant Convolution on Native Points: Lihan Li,

Haofeng Zhong,

Rui Bu,

Mingchao Sun,

Wenzheng Chen,

Baoquan Chen,

Yangyan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Lihan and Zhong, Haofeng and Bu, Rui and Sun, Mingchao and Chen, Wenzheng and Chen, Baoquan and Li, Yangyan}, title = {PointCNN++: Performant Convolution on Native Points}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24151-24161} }
Mind the Generative Details: Direct Localized Detail Preference Optimization for Video Diffusion Models: Zitong Huang,

Kaidong Zhang,

Yukang Ding,

Chao Gao,

Rui Ding,

Ying Chen,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zitong and Zhang, Kaidong and Ding, Yukang and Gao, Chao and Ding, Rui and Chen, Ying and Zuo, Wangmeng}, title = {Mind the Generative Details: Direct Localized Detail Preference Optimization for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35998-36008} }
High-Fidelity Diffusion Face Swapping with ID-Constrained Facial Conditioning: Dailan He,

Xiahong Wang,

Shulun Wang,

Hao Shao,

Bingqi Ma,

Guanglu Song,

Yu Liu,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Dailan and Wang, Xiahong and Wang, Shulun and Shao, Hao and Ma, Bingqi and Song, Guanglu and Liu, Yu and Li, Hongsheng}, title = {High-Fidelity Diffusion Face Swapping with ID-Constrained Facial Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25767-25776} }
Is Parameter Isolation Better for Prompt-Based Continual Learning?: Jiangyang Li,

Chenhao Ding,

SongLin Dong,

Qiang Wang,

Jianchao Zhao,

Yuhang He,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Ding, Chenhao and Dong, SongLin and Wang, Qiang and Zhao, Jianchao and He, Yuhang and Gong, Yihong}, title = {Is Parameter Isolation Better for Prompt-Based Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3887-3897} }
Weakly Supervised Video Anomaly Detection with Anomaly-Connected Components and Intention Reasoning: Yu Wang,

Shengjie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yu and Zhao, Shengjie}, title = {Weakly Supervised Video Anomaly Detection with Anomaly-Connected Components and Intention Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28545-28556} }
Duala: Dual-Level Alignment of Subjects and Stimuli for Cross-Subject fMRI Decoding: Shumeng Li,

Jintao Guo,

Jian Zhang,

Yulin Zhou,

Luyang Cao,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shumeng and Guo, Jintao and Zhang, Jian and Zhou, Yulin and Cao, Luyang and Shi, Yinghuan}, title = {Duala: Dual-Level Alignment of Subjects and Stimuli for Cross-Subject fMRI Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42722-42731} }
MatPedia: A Universal Generative Foundation for High-Fidelity Material Synthesis: Di Luo,

Shuhui Yang,

Mingxin Yang,

Jiawei Lu,

Yixuan Tang,

Xintong Han,

Zhuo Chen,

Beibei Wang,

Chunchao Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Di and Yang, Shuhui and Yang, Mingxin and Lu, Jiawei and Tang, Yixuan and Han, Xintong and Chen, Zhuo and Wang, Beibei and Guo, Chunchao}, title = {MatPedia: A Universal Generative Foundation for High-Fidelity Material Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8943-8953} }
Affordance Field Intervention: Enabling VLAs to Escape Memory Traps in Robotic Manipulation: Siyu Xu,

Zijian Wang,

Yunke Wang,

Chenghao Xia,

Tao Huang,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Siyu and Wang, Zijian and Wang, Yunke and Xia, Chenghao and Huang, Tao and Xu, Chang}, title = {Affordance Field Intervention: Enabling VLAs to Escape Memory Traps in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37206-37215} }
CSF: Black-box Fingerprinting via Compositional Semantics for Text-to-Image Models: Junhoo Lee,

Mijin Koo,

Nojun Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junhoo and Koo, Mijin and Kwak, Nojun}, title = {CSF: Black-box Fingerprinting via Compositional Semantics for Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16572-16582} }
CAR-SAM: Cross-Attention Reconstruction for Post-Training Quantization of the Segment Anything Model: Houji Wen,

Jiangyong Yu,

Dawei Yang,

Jun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Houji and Yu, Jiangyong and Yang, Dawei and Li, Jun}, title = {CAR-SAM: Cross-Attention Reconstruction for Post-Training Quantization of the Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33632-33641} }
Vision Transformers Need More Than Registers: Cheng Shi,

Yizhou Yu,

Sibei Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Cheng and Yu, Yizhou and Yang, Sibei}, title = {Vision Transformers Need More Than Registers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26328-26337} }
Evidential Neural Radiance Fields: Ruxiao Duan,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Ruxiao and Wong, Alex}, title = {Evidential Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28632-28641} }
ThinkingViT: Matryoshka Thinking Vision Transformer for Elastic Inference: Ali Hojjat,

Janek Haberer,

Sören Pirk,

Olaf Landsiedel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hojjat_2026_CVPR, author = {Hojjat, Ali and Haberer, Janek and Pirk, S\"oren and Landsiedel, Olaf}, title = {ThinkingViT: Matryoshka Thinking Vision Transformer for Elastic Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41923-41933} }
Progress by Pieces: Test-Time Scaling for Autoregressive Image Generation: Joonhyung Park,

Hyeongwon Jang,

Joowon Kim,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Joonhyung and Jang, Hyeongwon and Kim, Joowon and Yang, Eunho}, title = {Progress by Pieces: Test-Time Scaling for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38091-38100} }
Cluster-Aware Neural Collapse Prompt Tuning for Long-Tailed Generalization of Vision-Language Models: Boyang Guo,

Liang Li,

Lin Peng,

Yuhan Gao,

Xichun Sheng,

Chenggang Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Boyang and Li, Liang and Peng, Lin and Gao, Yuhan and Sheng, Xichun and Yan, Chenggang}, title = {Cluster-Aware Neural Collapse Prompt Tuning for Long-Tailed Generalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3122-3132} }
Prompt Yourself: Awakening Textual Semantics in 1D Visual Tokenizers: Hualiang Wang,

Siming Fu,

Weinan Jia,

Yuning Lu,

Mu Liu,

Jidong Jiang,

Xiaomeng Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hualiang and Fu, Siming and Jia, Weinan and Lu, Yuning and Liu, Mu and Jiang, Jidong and Li, Xiaomeng}, title = {Prompt Yourself: Awakening Textual Semantics in 1D Visual Tokenizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14864-14874} }
Gaussian-Mixture Latent Flow for Stochastic 3D Human Motion Prediction: Yue Ma,

Frederick W. B. Li,

Xiaohui Liang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yue and Li, Frederick W. B. and Liang, Xiaohui}, title = {Gaussian-Mixture Latent Flow for Stochastic 3D Human Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7131-7141} }
DreamStyle: A Unified Framework for Video Stylization: Mengtian Li,

Jinshu Chen,

Songtao Zhao,

Wanquan Feng,

Pengqi Tu,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Chen, Jinshu and Zhao, Songtao and Feng, Wanquan and Tu, Pengqi and He, Qian}, title = {DreamStyle: A Unified Framework for Video Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36019-36029} }
Uncertainty-Aware Modality Fusion for Unaligned RGB-T Salient Object Detection: Mianzhao Wang,

Fan Shi,

Xu Cheng,

Chen Jia,

Shengyong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Mianzhao and Shi, Fan and Cheng, Xu and Jia, Chen and Chen, Shengyong}, title = {Uncertainty-Aware Modality Fusion for Unaligned RGB-T Salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41489-41498} }
MD2E: Modeling Depth-to-Edge Cues for Monocular Metric Depth Estimation: Chao Ning,

Minghe Shen,

Naoto Yokoya; [pdf] [supp]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Shen, Minghe and Yokoya, Naoto}, title = {MD2E: Modeling Depth-to-Edge Cues for Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5772-5782} }
Texvent: Asynchronous Event Data Simulation via Text Prompt: Ruofei Wang,

Peiqi Duan,

Ka Chun Cheung,

Simon See,

Boxin Shi,

Renjie Wan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ruofei and Duan, Peiqi and Cheung, Ka Chun and See, Simon and Shi, Boxin and Wan, Renjie}, title = {Texvent: Asynchronous Event Data Simulation via Text Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36375-36384} }
GenSplat: Bridging the Generalization Gap in 3DGS Language Comprehension: Fang Liu,

Yuhao Liu,

Ke Xu,

Gerhard Petrus Hancke,

Rynson W. H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Fang and Liu, Yuhao and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W. H.}, title = {GenSplat: Bridging the Generalization Gap in 3DGS Language Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5221-5231} }
Coupled Diffusion Sampling for Training-Free Multi-View Image Editing: Hadi Alzayer,

Yunzhi Zhang,

Chen Geng,

Jia-Bin Huang,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alzayer_2026_CVPR, author = {Alzayer, Hadi and Zhang, Yunzhi and Geng, Chen and Huang, Jia-Bin and Wu, Jiajun}, title = {Coupled Diffusion Sampling for Training-Free Multi-View Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43686-43696} }
Video Panels for Long Video Understanding: Lars Doorenbos,

Federico Spurio,

Juergen Gall; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Doorenbos_2026_CVPR, author = {Doorenbos, Lars and Spurio, Federico and Gall, Juergen}, title = {Video Panels for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31293-31303} }
PhyGaP: Physically-Grounded Gaussians with Polarization Cues: Jiale Wu,

Xiaoyang Bai,

Zongqi He,

Weiwei Xu,

Yifan Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jiale and Bai, Xiaoyang and He, Zongqi and Xu, Weiwei and Peng, Yifan}, title = {PhyGaP: Physically-Grounded Gaussians with Polarization Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7278-7288} }
Distilling Balanced Knowledge from a Biased Teacher: Seonghak Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seonghak}, title = {Distilling Balanced Knowledge from a Biased Teacher}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18032-18041} }
Learning Transferable Temporal Primitives for Video Reasoning via Synthetic Videos: Songtao Jiang,

Sibo Song,

Chenyi Zhou,

Yuan Wang,

Ruizhe Chen,

Tongkun Guan,

Ruilin Luo,

Yan Zhang,

Zhihang Tang,

Yuchong Sun,

Hang Zhang,

Zhibo Yang,

Shuai Bai,

Junyang Lin,

Zuozhu Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Songtao and Song, Sibo and Zhou, Chenyi and Wang, Yuan and Chen, Ruizhe and Guan, Tongkun and Luo, Ruilin and Zhang, Yan and Tang, Zhihang and Sun, Yuchong and Zhang, Hang and Yang, Zhibo and Bai, Shuai and Lin, Junyang and Liu, Zuozhu}, title = {Learning Transferable Temporal Primitives for Video Reasoning via Synthetic Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31283-31292} }
Omni-3DEdit: Generalized Versatile 3D Editing in One-Pass: Liyi Chen,

Pengfei Wang,

Guowen Zhang,

Zhiyuan Ma,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Liyi and Wang, Pengfei and Zhang, Guowen and Ma, Zhiyuan and Zhang, Lei}, title = {Omni-3DEdit: Generalized Versatile 3D Editing in One-Pass}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12640-12650} }
UnReflectAnything: RGB-Only Highlight Removal by Rendering Synthetic Specular Supervision: Alberto Rota,

Mert Kiray,

Mert Asim Karaoglu,

Patrick Ruhkamp,

Elena De Momi,

Nassir Navab,

Benjamin Busam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rota_2026_CVPR, author = {Rota, Alberto and Kiray, Mert and Karaoglu, Mert Asim and Ruhkamp, Patrick and De Momi, Elena and Navab, Nassir and Busam, Benjamin}, title = {UnReflectAnything: RGB-Only Highlight Removal by Rendering Synthetic Specular Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {241-250} }
Understanding, Accelerating, and Improving MeanFlow Training: Jin-Young Kim,

Hyojun Go,

Lea Bogensperger,

Julius Erbach,

Nikolai Kalischek,

Federico Tombari,

Konrad Schindler,

Dominik Narnhofer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jin-Young and Go, Hyojun and Bogensperger, Lea and Erbach, Julius and Kalischek, Nikolai and Tombari, Federico and Schindler, Konrad and Narnhofer, Dominik}, title = {Understanding, Accelerating, and Improving MeanFlow Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37992-38003} }
FOZO: Forward-Only Zeroth-Order Prompt Optimization for Test-Time Adaptation: Xingyu Wang,

Tao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xingyu and Wang, Tao}, title = {FOZO: Forward-Only Zeroth-Order Prompt Optimization for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7936-7945} }
VisualOverload: Probing Visual Understanding of VLMs in Really Dense Scenes: Paul Gavrikov,

Wei Lin,

M. Jehanzeb Mirza,

Soumya Jahagirdar,

Muhammad Huzaifa,

Sivan Doveh,

James Glass,

Serena Yeung-Levy,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gavrikov_2026_CVPR, author = {Gavrikov, Paul and Lin, Wei and Mirza, M. Jehanzeb and Jahagirdar, Soumya and Huzaifa, Muhammad and Doveh, Sivan and Glass, James and Yeung-Levy, Serena and Kuehne, Hilde}, title = {VisualOverload: Probing Visual Understanding of VLMs in Really Dense Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40833-40844} }
STRNet: Visual Navigation with Spatio-Temporal Representation through Dynamic Graph Aggregation: Hao Ren,

Zetong Bi,

Yiming Zeng,

Zhaoliang Wan,

Lu Qi,

Hui Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Hao and Bi, Zetong and Zeng, Yiming and Wan, Zhaoliang and Qi, Lu and Cheng, Hui}, title = {STRNet: Visual Navigation with Spatio-Temporal Representation through Dynamic Graph Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42464-42473} }
AffordMatcher: Affordance Learning in 3D Scenes from Visual Signifiers: Nghia Vu,

Tuong Do,

Khang Nguyen,

Baoru Huang,

Nhat Le,

Binh Xuan Nguyen,

Erman Tjiputra,

Quang D. Tran,

Ravi Prakash,

Te-Chuan Chiu,

Anh Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vu_2026_CVPR, author = {Vu, Nghia and Do, Tuong and Nguyen, Khang and Huang, Baoru and Le, Nhat and Nguyen, Binh Xuan and Tjiputra, Erman and Tran, Quang D. and Prakash, Ravi and Chiu, Te-Chuan and Nguyen, Anh}, title = {AffordMatcher: Affordance Learning in 3D Scenes from Visual Signifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2635-2644} }
From 2D Alignment to 3D Plausibility: Unifying Heterogeneous 2D Priors and Penetration-Free Diffusion for Occlusion-Robust Two-Hand Reconstruction: Gaoge Han,

Yongkang Cheng,

Zhe Chen,

Shaoli Huang,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Gaoge and Cheng, Yongkang and Chen, Zhe and Huang, Shaoli and Liu, Tongliang}, title = {From 2D Alignment to 3D Plausibility: Unifying Heterogeneous 2D Priors and Penetration-Free Diffusion for Occlusion-Robust Two-Hand Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42800-42809} }
Language-Grounded Decoupled Action Representation for Robotic Manipulation: Wuding Weng,

Tongshu Wu,

Liucheng Chen,

Siyu Xie,

Zheng Wang,

Xing Xu,

Jingkuan Song,

Heng Tao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2026_CVPR, author = {Weng, Wuding and Wu, Tongshu and Chen, Liucheng and Xie, Siyu and Wang, Zheng and Xu, Xing and Song, Jingkuan and Shen, Heng Tao}, title = {Language-Grounded Decoupled Action Representation for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6770-6780} }
Parameterized Prompt for Incremental Object Detection: Zijia An,

Boyu Diao,

Ruiqi Liu,

Libo Huang,

Chuanguang Yang,

Fei Wang,

Zhulin An,

Yongjun Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Zijia and Diao, Boyu and Liu, Ruiqi and Huang, Libo and Yang, Chuanguang and Wang, Fei and An, Zhulin and Xu, Yongjun}, title = {Parameterized Prompt for Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27600-27610} }
PixelDiT: Pixel Diffusion Transformers for Image Generation: Yongsheng Yu,

Wei Xiong,

Weili Nie,

Yichen Sheng,

Shiqiu Liu,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yongsheng and Xiong, Wei and Nie, Weili and Sheng, Yichen and Liu, Shiqiu and Luo, Jiebo}, title = {PixelDiT: Pixel Diffusion Transformers for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14273-14282} }
Video-CoE: Reinforcing Video Event Prediction via Chain of Events: Qile Su,

Jing Tang,

Rui Chen,

Lei Sun,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Qile and Tang, Jing and Chen, Rui and Sun, Lei and Chu, Xiangxiang}, title = {Video-CoE: Reinforcing Video Event Prediction via Chain of Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32818-32828} }
FiDeSR: High-Fidelity and Detail-Preserving One-Step Diffusion Super-Resolution: Aro Kim,

Myeongjin Jang,

Chaewon Moon,

Youngjin Shin,

Jinwoo Jeong,

Sang-hyo Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Aro and Jang, Myeongjin and Moon, Chaewon and Shin, Youngjin and Jeong, Jinwoo and Park, Sang-hyo}, title = {FiDeSR: High-Fidelity and Detail-Preserving One-Step Diffusion Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38270-38280} }
QuantVLA: Scale-Calibrated Post-Training Quantization for Vision-Language-Action Models: Jingxuan Zhang,

Yunta Hsieh,

Zhongwei Wan,

Haokun Lin,

Xin Wang,

Ziqi Wang,

Yingtie Lei,

Mi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingxuan and Hsieh, Yunta and Wan, Zhongwei and Lin, Haokun and Wang, Xin and Wang, Ziqi and Lei, Yingtie and Zhang, Mi}, title = {QuantVLA: Scale-Calibrated Post-Training Quantization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39539-39549} }
BrepGaussian: CAD reconstruction from Multi-View Images with Gaussian Splatting: Jiaxing Yu,

Dongyang Ren,

Hangyu Xu,

Zhouyuxiao Yang,

Yuanqi Li,

Jie Guo,

Zhengkang Zhou,

Yanwen Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Jiaxing and Ren, Dongyang and Xu, Hangyu and Yang, Zhouyuxiao and Li, Yuanqi and Guo, Jie and Zhou, Zhengkang and Guo, Yanwen}, title = {BrepGaussian: CAD reconstruction from Multi-View Images with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26104-26113} }
HiF-VLA: Hindsight, Insight and Foresight through Motion Representation for Vision-Language-Action Models: Minghui Lin,

Pengxiang Ding,

Shu Wang,

Zifeng Zhuang,

Yang Liu,

Xinyang Tong,

Wenxuan Song,

Shangke Lyu,

Siteng Huang,

Donglin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Minghui and Ding, Pengxiang and Wang, Shu and Zhuang, Zifeng and Liu, Yang and Tong, Xinyang and Song, Wenxuan and Lyu, Shangke and Huang, Siteng and Wang, Donglin}, title = {HiF-VLA: Hindsight, Insight and Foresight through Motion Representation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20732-20742} }
VLM4RSDet: Collaborative Optimization with Vision-Language Model for Enhancing Remote Sensing Object Detection: Shuohao Shi,

Qiang Fang,

Xin Xu; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Shuohao and Fang, Qiang and Xu, Xin}, title = {VLM4RSDet: Collaborative Optimization with Vision-Language Model for Enhancing Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18450-18460} }
Exact-GS: Mathematically Rigorous and Accurate 3D Gaussian Splatting for 3D X-ray Reconstruction: Guangpu Yang,

Steffen Kieß,

Hanxiang Luo,

Xingyu Liu,

Sven Simon; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Guangpu and Kie{\ss}, Steffen and Luo, Hanxiang and Liu, Xingyu and Simon, Sven}, title = {Exact-GS: Mathematically Rigorous and Accurate 3D Gaussian Splatting for 3D X-ray Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4902-4911} }
SDGS: Spatial Difference Guided Gaussian Splatting for Simultaneous Localization and 3D Reconstruction: Yijian Tian,

Mingtao Ou,

Zijian Pan,

Xinglong Ji; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Yijian and Ou, Mingtao and Pan, Zijian and Ji, Xinglong}, title = {SDGS: Spatial Difference Guided Gaussian Splatting for Simultaneous Localization and 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4860-4869} }
Global Structure-from-Motion Meets Feedforward Reconstruction: Linfei Pan,

Johannes Schönberger,

Marc Pollefeys; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Linfei and Sch\"onberger, Johannes and Pollefeys, Marc}, title = {Global Structure-from-Motion Meets Feedforward Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21880-21890} }
TUNA: Taming Unified Visual Representations for Native Unified Multimodal Models: Zhiheng Liu,

Weiming Ren,

Haozhe Liu,

Zijian Zhou,

Shoufa Chen,

Haonan Qiu,

Xiaoke Huang,

Zhaochong An,

Fanny Yang,

Aditya Patel,

Viktar Atliha,

Tony Ng,

Xiao Han,

Chuyan Zhu,

Chenyang Zhang,

Ding Liu,

Juan-Manuel Perez-Rua,

Sen He,

Jürgen Schmidhuber,

Wenhu Chen,

Ping Luo,

Wei Liu,

Tao Xiang,

Jonas Schult,

Yuren Cong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiheng and Ren, Weiming and Liu, Haozhe and Zhou, Zijian and Chen, Shoufa and Qiu, Haonan and Huang, Xiaoke and An, Zhaochong and Yang, Fanny and Patel, Aditya and Atliha, Viktar and Ng, Tony and Han, Xiao and Zhu, Chuyan and Zhang, Chenyang and Liu, Ding and Perez-Rua, Juan-Manuel and He, Sen and Schmidhuber, J\"urgen and Chen, Wenhu and Luo, Ping and Liu, Wei and Xiang, Tao and Schult, Jonas and Cong, Yuren}, title = {TUNA: Taming Unified Visual Representations for Native Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15740-15751} }
Compositional Transformation Reasoning for Composed Video Retrieval: Sihong Huang,

Jiaxin Wu,

Dongmei Jiang,

Yi Cai,

Yaowei Wang,

Xiaoyong Wei; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Sihong and Wu, Jiaxin and Jiang, Dongmei and Cai, Yi and Wang, Yaowei and Wei, Xiaoyong}, title = {Compositional Transformation Reasoning for Composed Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25644-25653} }
ImmerIris: A Large-Scale Dataset and Benchmark for Off-Axis and Unconstrained Iris Recognition in Immersive Applications: Yuxi Mi,

Qiuyang Yuan,

Zhizhou Zhong,

Xuan Zhao,

Jiaogen Zhou,

Fubao Zhu,

Jihong Guan,

Shuigeng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2026_CVPR, author = {Mi, Yuxi and Yuan, Qiuyang and Zhong, Zhizhou and Zhao, Xuan and Zhou, Jiaogen and Zhu, Fubao and Guan, Jihong and Zhou, Shuigeng}, title = {ImmerIris: A Large-Scale Dataset and Benchmark for Off-Axis and Unconstrained Iris Recognition in Immersive Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28838-28847} }
Foundation Model Priors Enhance Object Focus in Feature Space for Source-Free Object Detection: Sairam VCR,

Rishabh Lalla,

Aveen Dayal,

Tejal Kulkarni,

Anuj Lalla,

Vineeth N. Balasubramanian,

Muhammad Haris Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{VCR_2026_CVPR, author = {VCR, Sairam and Lalla, Rishabh and Dayal, Aveen and Kulkarni, Tejal and Lalla, Anuj and Balasubramanian, Vineeth N. and Khan, Muhammad Haris}, title = {Foundation Model Priors Enhance Object Focus in Feature Space for Source-Free Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29431-29440} }
ShreddingNet: Coarse-to-Fine Restoration for Multi-Source Shredded Manuscripts: Haoyang Cui,

Hao Jiang,

Yadong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Haoyang and Jiang, Hao and Mu, Yadong}, title = {ShreddingNet: Coarse-to-Fine Restoration for Multi-Source Shredded Manuscripts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8247-8256} }
Reading or Reasoning? Format Decoupled Reinforcement Learning for Document OCR: Yufeng Zhong,

Lei Chen,

Zhixiong Zeng,

Xuanle Zhao,

Deyang Jiang,

Liming Zheng,

Jing Huang,

Haibo Qiu,

Peng Shi,

Siqi Yang,

Lin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yufeng and Chen, Lei and Zeng, Zhixiong and Zhao, Xuanle and Jiang, Deyang and Zheng, Liming and Huang, Jing and Qiu, Haibo and Shi, Peng and Yang, Siqi and Ma, Lin}, title = {Reading or Reasoning? Format Decoupled Reinforcement Learning for Document OCR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33164-33173} }
IGen: Scalable Data Generation for Robot Learning from Open-World Images: Chenghao Gu,

Haolan Kang,

Junchao Lin,

Jinghe Wang,

Duo Wu,

Shuzhao Xie,

Fanding Huang,

Junchen Ge,

Ziyang Gong,

Letian Li,

Hongying Zheng,

Changwei Lv,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Chenghao and Kang, Haolan and Lin, Junchao and Wang, Jinghe and Wu, Duo and Xie, Shuzhao and Huang, Fanding and Ge, Junchen and Gong, Ziyang and Li, Letian and Zheng, Hongying and Lv, Changwei and Wang, Zhi}, title = {IGen: Scalable Data Generation for Robot Learning from Open-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28111-28122} }
RegFormer: Transferable Relational Grounding for Efficient Weakly-Supervised Human-Object Interaction Detection: Jihwan Park,

Chanhyeong Yang,

Jinyoung Park,

Taehoon Song,

Hyunwoo J. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jihwan and Yang, Chanhyeong and Park, Jinyoung and Song, Taehoon and Kim, Hyunwoo J.}, title = {RegFormer: Transferable Relational Grounding for Efficient Weakly-Supervised Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10387-10396} }
MotionScale: Reconstructing Appearance, Geometry, and Motion of Dynamic Scenes with Scalable 4D Gaussian Splatting: Haoran Zhou,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Haoran and Lee, Gim Hee}, title = {MotionScale: Reconstructing Appearance, Geometry, and Motion of Dynamic Scenes with Scalable 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11860-11870} }
$\phi$-DPO: Fairness Direct Preference Optimization Approach to Continual Learning in Large Multimodal Models: Thanh-Dat Truong,

Huu-Thien Tran,

Jackson Cothren,

Bhiksha Raj,

Khoa Luu; [pdf] [supp]
[bibtex]
@InProceedings{Truong_2026_CVPR, author = {Truong, Thanh-Dat and Tran, Huu-Thien and Cothren, Jackson and Raj, Bhiksha and Luu, Khoa}, title = {\${\textbackslash}phi\$-DPO: Fairness Direct Preference Optimization Approach to Continual Learning in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39922-39934} }
PhyCo: Learning Controllable Physical Priors for Generative Motion: Sriram Narayanan,

Ziyu Jiang,

Srinivasa Narasimhan,

Manmohan Chandraker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Narayanan_2026_CVPR, author = {Narayanan, Sriram and Jiang, Ziyu and Narasimhan, Srinivasa and Chandraker, Manmohan}, title = {PhyCo: Learning Controllable Physical Priors for Generative Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41892-41902} }
Better than Average: Spatially-Aware Aggregation of Segmentation Uncertainty Improves Downstream Performance: Vanessa Emanuela Guarino,

Claudia Winklmayr,

Jannik Franzen,

Josef Lorenz Rumberger,

Manuel Pfeuffer,

Sonja Greven,

Klaus Maier-Hein,

Dagmar Kainmueller,

Christoph Karg,

Carsten T. Lüth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guarino_2026_CVPR, author = {Guarino, Vanessa Emanuela and Winklmayr, Claudia and Franzen, Jannik and Rumberger, Josef Lorenz and Pfeuffer, Manuel and Greven, Sonja and Maier-Hein, Klaus and Kainmueller, Dagmar and Karg, Christoph and L\"uth, Carsten T.}, title = {Better than Average: Spatially-Aware Aggregation of Segmentation Uncertainty Improves Downstream Performance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13145-13156} }
Real-Time Multimodal Fingertip Contact Detection via Depth and Motion Fusion for Vision-Based Human-Computer Interaction: Mukhiddin Toshpulatov,

Wookey Lee,

Suan Lee,

Geehyuk Lee; [pdf] [supp]
[bibtex]
@InProceedings{Toshpulatov_2026_CVPR, author = {Toshpulatov, Mukhiddin and Lee, Wookey and Lee, Suan and Lee, Geehyuk}, title = {Real-Time Multimodal Fingertip Contact Detection via Depth and Motion Fusion for Vision-Based Human-Computer Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1819-1828} }
Weight Space Representation Learning via Neural Field Adaptation: Zhuoqian Yang,

Mathieu Salzmann,

Sabine Süsstrunk; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhuoqian and Salzmann, Mathieu and S\"usstrunk, Sabine}, title = {Weight Space Representation Learning via Neural Field Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17734-17743} }
Bringing Your Portrait to 3D Presence: Jiawei Zhang,

Lei Chu,

Jiahao Li,

Zhenyu Zang,

Chong Li,

Xiao Li,

Xun Cao,

Hao Zhu,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiawei and Chu, Lei and Li, Jiahao and Zang, Zhenyu and Li, Chong and Li, Xiao and Cao, Xun and Zhu, Hao and Lu, Yan}, title = {Bringing Your Portrait to 3D Presence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28468-28480} }
Learning Straight Flows: Variational Flow Matching for Efficient Generation: Chenrui Ma,

Xi Xiao,

Tianyang Wang,

Xiao Wang,

Yanning Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Chenrui and Xiao, Xi and Wang, Tianyang and Wang, Xiao and Shen, Yanning}, title = {Learning Straight Flows: Variational Flow Matching for Efficient Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38154-38164} }
OpenMMReasoner: Pushing the Frontiers in Multimodal Reasoning with an Open and General Recipe: Kaichen Zhang,

Keming Wu,

Zuhao Yang,

Bo Li,

Kairui Hu,

Bin Wang,

Xingxuan Li,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaichen and Wu, Keming and Yang, Zuhao and Li, Bo and Hu, Kairui and Wang, Bin and Li, Xingxuan and Bing, Lidong}, title = {OpenMMReasoner: Pushing the Frontiers in Multimodal Reasoning with an Open and General Recipe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19276-19286} }
MMGait: Towards Multi-Modal Gait Recognition: Chenye Wang,

Qingyuan Cai,

Saihui Hou,

Aoqi Li,

Yongzhen Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenye and Cai, Qingyuan and Hou, Saihui and Li, Aoqi and Huang, Yongzhen}, title = {MMGait: Towards Multi-Modal Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1726-1736} }
Differentiable Adaptive 4D Structured Illumination for Joint Capture of Shape and Reflectance: Huakeng Ding,

Yaowen Chen,

Kun Zhou,

Hongzhi Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Huakeng and Chen, Yaowen and Zhou, Kun and Wu, Hongzhi}, title = {Differentiable Adaptive 4D Structured Illumination for Joint Capture of Shape and Reflectance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12448-12457} }
DCoAR: Deep Concept Injection into Unified Autoregressive Models for Personalized Text-to-Image Generation: Fangtai Wu,

Mushui Liu,

Weijie He,

Zhao Wang,

Yunlong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Fangtai and Liu, Mushui and He, Weijie and Wang, Zhao and Yu, Yunlong}, title = {DCoAR: Deep Concept Injection into Unified Autoregressive Models for Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29265-29274} }
Efficient Real-Time Raw-to-Raw Denoising for Extreme Low-Light Ultra HD Video on Mobile Devices: Charantej Pochimireddy,

Subhasmita Sahoo,

Apoorva Verma,

Palavalli Shyam,

Swapnil Malviya,

Sarvesh Sarvesh,

Raj Gadde; [pdf] [supp]
[bibtex]
@InProceedings{Pochimireddy_2026_CVPR, author = {Pochimireddy, Charantej and Sahoo, Subhasmita and Verma, Apoorva and Shyam, Palavalli and Malviya, Swapnil and Sarvesh, Sarvesh and Gadde, Raj}, title = {Efficient Real-Time Raw-to-Raw Denoising for Extreme Low-Light Ultra HD Video on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1376-1385} }
OneSparse: A Unified Framework for Sparse Activation Layers in Vision Models: Xingkui Zhu,

Dingkang Liang,

Cheng Chen,

Daoxin Zhang,

lv Hanxiang,

Zhe Xu,

Yao Hu,

Xiang Bai; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingkui and Liang, Dingkang and Chen, Cheng and Zhang, Daoxin and Hanxiang, lv and Xu, Zhe and Hu, Yao and Bai, Xiang}, title = {OneSparse: A Unified Framework for Sparse Activation Layers in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12084-12094} }
Dynamic Visual SLAM using a General 3D Prior: Xingguang Zhong,

Liren Jin,

Marija Popovic,

Jens Behley,

Cyrill Stachniss; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Xingguang and Jin, Liren and Popovic, Marija and Behley, Jens and Stachniss, Cyrill}, title = {Dynamic Visual SLAM using a General 3D Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21805-21815} }
W2W: Language-Model-Based Trajectory Prediction with Reinforcement Learning: Zirui Xu,

Biao Yang,

Rongrong Ni,

Zhongkai Zhou,

Shaobo Shen; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zirui and Yang, Biao and Ni, Rongrong and Zhou, Zhongkai and Shen, Shaobo}, title = {W2W: Language-Model-Based Trajectory Prediction with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23538-23548} }
Thinking with Video: Video Generation as a Promising Multimodal Reasoning Paradigm: Jingqi Tong,

Yurong Mou,

Hangcheng Li,

Mingzhe Li,

Yongzhuo Yang,

Ming Zhang,

Qiguang Chen,

Tianyi Liang,

Xiaomeng Hu,

Yining Zheng,

Xinchi Chen,

Jun Zhao,

Xuanjing Huang,

Xipeng Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2026_CVPR, author = {Tong, Jingqi and Mou, Yurong and Li, Hangcheng and Li, Mingzhe and Yang, Yongzhuo and Zhang, Ming and Chen, Qiguang and Liang, Tianyi and Hu, Xiaomeng and Zheng, Yining and Chen, Xinchi and Zhao, Jun and Huang, Xuanjing and Qiu, Xipeng}, title = {Thinking with Video: Video Generation as a Promising Multimodal Reasoning Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41121-41129} }
RAID: Retrieval-Augmented Anomaly Detection: Mingxiu Cai,

Zhe Zhang,

Gaochang Wu,

Tianyou Chai,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Mingxiu and Zhang, Zhe and Wu, Gaochang and Chai, Tianyou and Zhu, Xiatian}, title = {RAID: Retrieval-Augmented Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21367-21378} }
ID-Crafter: VLM-Grounded Online RL for Compositional Multi-Subject Video Generation: Panwang Pan,

Jingjing Zhao,

Yuchen Lin,

Chenguo Lin,

Chenxin Li,

Hengyu Liu,

Tingting Shen,

Yadong Mu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Panwang and Zhao, Jingjing and Lin, Yuchen and Lin, Chenguo and Li, Chenxin and Liu, Hengyu and Shen, Tingting and Mu, Yadong}, title = {ID-Crafter: VLM-Grounded Online RL for Compositional Multi-Subject Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36627-36637} }
Content-Aware Frequency Encoding for Implicit Neural Representations with Fourier-Chebyshev Features: Junbo Ke,

Yangyang Xu,

Chao Wang,

You-Wei Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Junbo and Xu, Yangyang and Wang, Chao and Wen, You-Wei}, title = {Content-Aware Frequency Encoding for Implicit Neural Representations with Fourier-Chebyshev Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3646-3655} }
ShowTable: Unlocking Creative Table Visualization with Collaborative Reflection and Refinement: Zhihang Liu,

Xiaoyi Bao,

Pandeng Li,

Junjie Zhou,

Zhaohe Liao,

Yefei He,

Kaixun Jiang,

Chen-Wei Xie,

Yun Zheng,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhihang and Bao, Xiaoyi and Li, Pandeng and Zhou, Junjie and Liao, Zhaohe and He, Yefei and Jiang, Kaixun and Xie, Chen-Wei and Zheng, Yun and Xie, Hongtao}, title = {ShowTable: Unlocking Creative Table Visualization with Collaborative Reflection and Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24405-24416} }
Order Matters: 3D Shape Generation from Sequential VR Sketches: Yizi Chen,

Sidi Wu,

Tianyi Xiao,

Nina Wiedemann,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yizi and Wu, Sidi and Xiao, Tianyi and Wiedemann, Nina and Landrieu, Loic}, title = {Order Matters: 3D Shape Generation from Sequential VR Sketches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34259-34269} }
PinPoint: Evaluation of Composed Image Retrieval with Explicit Negatives, Multi-Image Queries, and Paraphrase Testing: Rohan Mahadev,

Joyce Yuan,

Patrick Poirson,

David Xue,

Hao-Yu Wu,

Dmitry Kislyuk; [pdf] [arXiv]
[bibtex]
@InProceedings{Mahadev_2026_CVPR, author = {Mahadev, Rohan and Yuan, Joyce and Poirson, Patrick and Xue, David and Wu, Hao-Yu and Kislyuk, Dmitry}, title = {PinPoint: Evaluation of Composed Image Retrieval with Explicit Negatives, Multi-Image Queries, and Paraphrase Testing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9742-9751} }
Physical Object Understanding with a Physically Controllable World Model: Rahul Venkatesh,

Klemen Kotar,

Lilian Naing Chen,

Wanhee Lee,

Gia Ancone,

Seungwoo Kim,

Luca Thomas Wheeler,

Jared Watrous,

Honglin Chen,

Daniel Bear,

Stefan Stojanov,

Daniel LK Yamins; [pdf] [supp]
[bibtex]
@InProceedings{Venkatesh_2026_CVPR, author = {Venkatesh, Rahul and Kotar, Klemen and Chen, Lilian Naing and Lee, Wanhee and Ancone, Gia and Kim, Seungwoo and Wheeler, Luca Thomas and Watrous, Jared and Chen, Honglin and Bear, Daniel and Stojanov, Stefan and Yamins, Daniel LK}, title = {Physical Object Understanding with a Physically Controllable World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2593-2602} }
Hybrid Agents for Image Restoration: Bingchen Li,

Xin Li,

Yiting Lu,

Zhibo Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingchen and Li, Xin and Lu, Yiting and Chen, Zhibo}, title = {Hybrid Agents for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22636-22647} }
Same or Not? Enhancing Visual Perception in Vision-Language Models: Damiano Marsili,

Aditya Mehta,

Ryan Y. Lin,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marsili_2026_CVPR, author = {Marsili, Damiano and Mehta, Aditya and Lin, Ryan Y. and Gkioxari, Georgia}, title = {Same or Not? Enhancing Visual Perception in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17303-17315} }
Turbo-GS: Accelerating 3D Gaussian Fitting for High-Resolution Radiance Fields: Ankit Dhiman,

Tao Lu,

R Srinath,

Emre Arslan,

Angela Xing,

Yuanbo Xiangli,

Venkatesh Babu Radhakrishnan,

Srinath Sridhar; [pdf] [supp]
[bibtex]
@InProceedings{Dhiman_2026_CVPR, author = {Dhiman, Ankit and Lu, Tao and Srinath, R and Arslan, Emre and Xing, Angela and Xiangli, Yuanbo and Radhakrishnan, Venkatesh Babu and Sridhar, Srinath}, title = {Turbo-GS: Accelerating 3D Gaussian Fitting for High-Resolution Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15454-15464} }
DF^2-VB: Dual-level Fuzzy Fusion with View-specific Boosting for Multi-view Multi-label Classification: Yuena Lin,

Haichun Cai,

Yi Shan,

Hao Wei,

Yongjian Deng,

Zhen Yang,

Gengyu Lyu; [pdf]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yuena and Cai, Haichun and Shan, Yi and Wei, Hao and Deng, Yongjian and Yang, Zhen and Lyu, Gengyu}, title = {DF{\textasciicircum}2-VB: Dual-level Fuzzy Fusion with View-specific Boosting for Multi-view Multi-label Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33859-33868} }
EmbodMocap: In-the-Wild 4D Human-Scene Reconstruction for Embodied Agents: Wenjia Wang,

Liang Pan,

Huaijin Pi,

Yuke Lou,

Xuqian Ren,

Yifan Wu,

Zhouyingcheng Liao,

Lei Yang,

Rishabh Dabral,

Christian Theobalt,

Taku Komura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Wenjia and Pan, Liang and Pi, Huaijin and Lou, Yuke and Ren, Xuqian and Wu, Yifan and Liao, Zhouyingcheng and Yang, Lei and Dabral, Rishabh and Theobalt, Christian and Komura, Taku}, title = {EmbodMocap: In-the-Wild 4D Human-Scene Reconstruction for Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28424-28434} }
Learning Effective Sign Features without Text for Gloss-free Sign Language Translation: Shiwei Gan,

Xiao Liu,

Yafeng Yin,

Nan Liu,

Kuizhuang Liu,

Desibieer Tuerdaken,

Zhiwei Jiang,

Lei Xie,

Sanglu Lu,

Hongkai Wen; [pdf] [supp]
[bibtex]
@InProceedings{Gan_2026_CVPR, author = {Gan, Shiwei and Liu, Xiao and Yin, Yafeng and Liu, Nan and Liu, Kuizhuang and Tuerdaken, Desibieer and Jiang, Zhiwei and Xie, Lei and Lu, Sanglu and Wen, Hongkai}, title = {Learning Effective Sign Features without Text for Gloss-free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9827-9836} }
Reconstruction-Guided Slot Curriculum: Addressing Object Over-Fragmentation in Video Object-Centric Learning: WonJun Moon,

Hyun Seok Seong,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Reconstruction-Guided Slot Curriculum: Addressing Object Over-Fragmentation in Video Object-Centric Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25001-25010} }
GeoSURGE: Geo-localization using Semantic Fusion with Hierarchy of Geographic Embeddings: Angel Daruna,

Nicholas Meegan,

Han-Pang Chiu,

Supun Samarasekera,

Rakesh Kumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Daruna_2026_CVPR, author = {Daruna, Angel and Meegan, Nicholas and Chiu, Han-Pang and Samarasekera, Supun and Kumar, Rakesh}, title = {GeoSURGE: Geo-localization using Semantic Fusion with Hierarchy of Geographic Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41396-41405} }
Towards Foundation Models for 3D Scene Understanding: Instance-Aware Self-Supervised Learning for Point Clouds: Bin Yang,

Mohamed Abdelsamad,

Miao Zhang,

Alexandru Paul Condurache; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Bin and Abdelsamad, Mohamed and Zhang, Miao and Condurache, Alexandru Paul}, title = {Towards Foundation Models for 3D Scene Understanding: Instance-Aware Self-Supervised Learning for Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2938-2947} }
Building a Precise Video Language with Human-AI Oversight: Zhiqiu Lin,

Siyuan Cen,

Chancharik Mitra,

Isaac Li,

Yuhan Huang,

Yu Tong Tiffany Ling,

Hewei Wang,

Irene Pi,

Shihang Zhu,

Yili Han,

Yilun Du,

Deva Ramanan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zhiqiu and Cen, Siyuan and Mitra, Chancharik and Li, Isaac and Huang, Yuhan and Ling, Yu Tong Tiffany and Wang, Hewei and Pi, Irene and Zhu, Shihang and Han, Yili and Du, Yilun and Ramanan, Deva}, title = {Building a Precise Video Language with Human-AI Oversight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11334-11345} }
Clair Obscur: an Illumination-Aware Method for Real-World Image Vectorization: Xingyue Lin,

Shuai Peng,

Xiangyu Xie,

Jianhua Zhu,

Yuxuan Zhou,

Liangcai Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Xingyue and Peng, Shuai and Xie, Xiangyu and Zhu, Jianhua and Zhou, Yuxuan and Gao, Liangcai}, title = {Clair Obscur: an Illumination-Aware Method for Real-World Image Vectorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9161-9170} }
Virtual Full-stack Scanning of Brain MRI via Imputing Any Quantised Code: Yicheng Wu,

Tao Song,

Zhonghua Wu,

Jin Ye,

Zongyuan Ge,

Wenjia Bai,

Zhaolin Chen,

Jianfei Cai; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yicheng and Song, Tao and Wu, Zhonghua and Ye, Jin and Ge, Zongyuan and Bai, Wenjia and Chen, Zhaolin and Cai, Jianfei}, title = {Virtual Full-stack Scanning of Brain MRI via Imputing Any Quantised Code}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21026-21035} }
MoD-DPO: Towards Mitigating Cross-modal Hallucinations in Omni LLMs using Modality Decoupled Preference Optimization: Ashutosh Chaubey,

Jiacheng Pang,

Mohammad Soleymani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaubey_2026_CVPR, author = {Chaubey, Ashutosh and Pang, Jiacheng and Soleymani, Mohammad}, title = {MoD-DPO: Towards Mitigating Cross-modal Hallucinations in Omni LLMs using Modality Decoupled Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18284-18294} }
PerpetualWonder: Long-horizon Action-conditioned 4D Scene Generation: Jiahao Zhan,

Zizhang Li,

Hong-Xing Yu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jiahao and Li, Zizhang and Yu, Hong-Xing and Wu, Jiajun}, title = {PerpetualWonder: Long-horizon Action-conditioned 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25370-25380} }
Beyond Strict Pairing: Arbitrarily Paired Training for High-Performance Infrared and Visible Image Fusion: Yanglin Deng,

Tianyang Xu,

Chunyang Cheng,

Hui Li,

Xiaojun Wu,

Josef Kittler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Yanglin and Xu, Tianyang and Cheng, Chunyang and Li, Hui and Wu, Xiaojun and Kittler, Josef}, title = {Beyond Strict Pairing: Arbitrarily Paired Training for High-Performance Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12334-12343} }
PhotoFramer: Multi-modal Image Composition Instruction: Zhiyuan You,

Ke Wang,

He Zhang,

Xin Cai,

Jinjin Gu,

Tianfan Xue,

Chao Dong,

Zhoutong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Zhiyuan and Wang, Ke and Zhang, He and Cai, Xin and Gu, Jinjin and Xue, Tianfan and Dong, Chao and Zhang, Zhoutong}, title = {PhotoFramer: Multi-modal Image Composition Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10197-10207} }
SIF: Semantically In-Distribution Fingerprints for Large Vision-Language Models: Yifei Zhao,

Qian Lou,

Mengxin Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yifei and Lou, Qian and Zheng, Mengxin}, title = {SIF: Semantically In-Distribution Fingerprints for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17399-17408} }
Design Your Ad: Personalized Advertising Image and Text Generation with Unified Autoregressive Models: Yexing Xu,

Wei Feng,

Shen Zhang,

Haohan Wang,

Yuxin Qin,

Yaoyu Li,

Ao Ma,

Yuhao Luo,

Lu Wang,

Xudong Ren,

Haoran Wang,

Run Ling,

Zheng Zhang,

Jingjing Lv,

Junjie Shen,

Ching Law,

Longguang Wang,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yexing and Feng, Wei and Zhang, Shen and Wang, Haohan and Qin, Yuxin and Li, Yaoyu and Ma, Ao and Luo, Yuhao and Wang, Lu and Ren, Xudong and Wang, Haoran and Ling, Run and Zhang, Zheng and Lv, Jingjing and Shen, Junjie and Law, Ching and Wang, Longguang and Guo, Yulan}, title = {Design Your Ad: Personalized Advertising Image and Text Generation with Unified Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {472-483} }
Taxonomy-Aware Representation Alignment for Hierarchical Visual Recognition with Large Multimodal Models: Hulingxiao He,

Zhi Tan,

Yuxin Peng; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Hulingxiao and Tan, Zhi and Peng, Yuxin}, title = {Taxonomy-Aware Representation Alignment for Hierarchical Visual Recognition with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31124-31134} }
WorldLens: Full-Spectrum Evaluations of Driving World Models in Real World: Ao Liang,

Lingdong Kong,

Tianyi Yan,

Hongsi Liu,

Yu Yang,

Ziqi Huang,

Wei Yin,

Jialong Zuo,

Yixuan Hu,

Dekai Zhu,

Dongyue Lu,

Youquan Liu,

Guangfeng Jiang,

Linfeng Li,

Xiangtai Li,

Long Zhuo,

Lai Xing Ng,

Benoit R. Cottereau,

Changxin Gao,

Liang Pan,

Wei Tsang Ooi,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Ao and Kong, Lingdong and Yan, Tianyi and Liu, Hongsi and Yang, Yu and Huang, Ziqi and Yin, Wei and Zuo, Jialong and Hu, Yixuan and Zhu, Dekai and Lu, Dongyue and Liu, Youquan and Jiang, Guangfeng and Li, Linfeng and Li, Xiangtai and Zhuo, Long and Ng, Lai Xing and Cottereau, Benoit R. and Gao, Changxin and Pan, Liang and Ooi, Wei Tsang and Liu, Ziwei}, title = {WorldLens: Full-Spectrum Evaluations of Driving World Models in Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36385-36399} }
DiffGraph: An Automated Agent-driven Model Merging Framework for In-the-Wild Text-to-Image Generation: Zhuoling Li,

Hossein Rahmani,

Jiarui Zhang,

Yu Xue,

Majid Mirmehdi,

Jason Kuen,

Jiuxiang Gu,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhuoling and Rahmani, Hossein and Zhang, Jiarui and Xue, Yu and Mirmehdi, Majid and Kuen, Jason and Gu, Jiuxiang and Liu, Jun}, title = {DiffGraph: An Automated Agent-driven Model Merging Framework for In-the-Wild Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36713-36723} }
ReFTA: Breaking the Weight Reconstruction Bottleneck in Tensorized Parameter-Efficient Fine-Tuning: Jingjing Zheng,

Anda Tang,

Qiangqiang Mao,

Zhouchen Lin,

Yankai Cao; [pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jingjing and Tang, Anda and Mao, Qiangqiang and Lin, Zhouchen and Cao, Yankai}, title = {ReFTA: Breaking the Weight Reconstruction Bottleneck in Tensorized Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26369-26378} }
From Exploration to Exploitation: A Two-Stage Entropy RLVR Approach for Noise-Tolerant MLLM Training: Donglai Xu,

Hongzheng Yang,

Yuzhi Zhao,

Pingping Zhang,

Jinpeng Chen,

Wenao Ma,

Zhijian Hou,

Mengyang Wu,

Xiaolei Li,

Senkang Hu,

Ziyi Guan,

Jason Chun Lok Li,

Lai-Man Po; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Donglai and Yang, Hongzheng and Zhao, Yuzhi and Zhang, Pingping and Chen, Jinpeng and Ma, Wenao and Hou, Zhijian and Wu, Mengyang and Li, Xiaolei and Hu, Senkang and Guan, Ziyi and Li, Jason Chun Lok and Po, Lai-Man}, title = {From Exploration to Exploitation: A Two-Stage Entropy RLVR Approach for Noise-Tolerant MLLM Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17776-17786} }
Tracking-Guided 4D Generation: Foundation-Tracker Motion Priors for 3D Model Animation: Su Sun,

Cheng Zhao,

Himangi Mittal,

Gaurav Mittal,

Rohith Kukkala,

Yingjie Victor Chen,

Mei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Su and Zhao, Cheng and Mittal, Himangi and Mittal, Gaurav and Kukkala, Rohith and Chen, Yingjie Victor and Chen, Mei}, title = {Tracking-Guided 4D Generation: Foundation-Tracker Motion Priors for 3D Model Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40992-41001} }
Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling: Valter Piedade,

Lalit Manam,

Masashi Yamazaki,

Pedro Miraldo; [pdf] [supp]
[bibtex]
@InProceedings{Piedade_2026_CVPR, author = {Piedade, Valter and Manam, Lalit and Yamazaki, Masashi and Miraldo, Pedro}, title = {Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28925-28936} }
RobustVisRAG: Causality-Aware Vision-Based Retrieval-Augmented Generation under Visual Degradations: I-Hsiang Chen,

Yu-Wei Liu,

Tse-Yu Wu,

Yu-Chien Chiang,

Jen-Chieh Yang,

Wei-Ting Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, I-Hsiang and Liu, Yu-Wei and Wu, Tse-Yu and Chiang, Yu-Chien and Yang, Jen-Chieh and Chen, Wei-Ting}, title = {RobustVisRAG: Causality-Aware Vision-Based Retrieval-Augmented Generation under Visual Degradations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40802-40811} }
Dual-Level Hypergraph Generation for Addressing Feature Scarcity in Whole-Slide Image Classification: Shuilian Yao,

Qi Jia,

Yu Liu,

Pengshuo Zhang,

Lili Sun,

Weimin Wang,

Yanmei Zhu,

Bo Zhang,

Xin Fan; [pdf]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Shuilian and Jia, Qi and Liu, Yu and Zhang, Pengshuo and Sun, Lili and Wang, Weimin and Zhu, Yanmei and Zhang, Bo and Fan, Xin}, title = {Dual-Level Hypergraph Generation for Addressing Feature Scarcity in Whole-Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28328-28337} }
Edit2Perceive: Image Editing Diffusion Models Are Strong Dense Perceivers: Yiqing Shi,

Yiren Song,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yiqing and Song, Yiren and Shou, Mike Zheng}, title = {Edit2Perceive: Image Editing Diffusion Models Are Strong Dense Perceivers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43568-43577} }
Will Multimodal Models Be Dazzled by Multi-Image Visual Puzzles?: Zhi Zhu,

YaoQi Fan,

Zhe Chen,

Yue Cao,

Yangzhou Liu,

Tong Lu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhi and Fan, YaoQi and Chen, Zhe and Cao, Yue and Liu, Yangzhou and Lu, Tong}, title = {Will Multimodal Models Be Dazzled by Multi-Image Visual Puzzles?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11943-11953} }
Unpaired Image Deraining Using Reward-Guided Self-Reinforcement Strategy: Yinghao Chen,

Yeying Jin,

Xiang Chen,

Yanyan Wei,

Ziyang Yan,

Yaowen Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yinghao and Jin, Yeying and Chen, Xiang and Wei, Yanyan and Yan, Ziyang and Fu, Yaowen}, title = {Unpaired Image Deraining Using Reward-Guided Self-Reinforcement Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1342-1354} }
Models as Lego Builders: Assembling Malice from Benign Blocks via Semantic Blueprints: Chenxi Li,

Xianggan Liu,

Dake Shen,

Yaosong Du,

Zhibo Yao,

Hao Jiang,

Linyi Jiang,

Chengwei Cao,

Jingzhe Zhang,

RanYi Peng,

Peiling Bai,

Xiande Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenxi and Liu, Xianggan and Shen, Dake and Du, Yaosong and Yao, Zhibo and Jiang, Hao and Jiang, Linyi and Cao, Chengwei and Zhang, Jingzhe and Peng, RanYi and Bai, Peiling and Huang, Xiande}, title = {Models as Lego Builders: Assembling Malice from Benign Blocks via Semantic Blueprints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1533-1542} }
GroundVTS: Visual Token Sampling in Multimodal Large Language Models for Video Temporal Grounding: Rong Fan,

Kaiyan Xiao,

Minghao Zhu,

Liuyi Wang,

Kai Dai,

Zhao Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Rong and Xiao, Kaiyan and Zhu, Minghao and Wang, Liuyi and Dai, Kai and Yang, Zhao}, title = {GroundVTS: Visual Token Sampling in Multimodal Large Language Models for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10408-10418} }
LAM: Language Articulated Object Modelers: Yipeng Gao,

Yunhao Ge,

Peilin Cai,

Daniel Seita,

Laurent Itti; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yipeng and Ge, Yunhao and Cai, Peilin and Seita, Daniel and Itti, Laurent}, title = {LAM: Language Articulated Object Modelers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16010-16020} }
The Power of Prior: Training-Free Open-Vocabulary Semantic Segmentation with LLaVA: Bingfeng Zhang,

Siyue Yu,

Hui Li,

Jiahua Lin,

Wenwu Wang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bingfeng and Yu, Siyue and Li, Hui and Lin, Jiahua and Wang, Wenwu and Xiao, Jimin}, title = {The Power of Prior: Training-Free Open-Vocabulary Semantic Segmentation with LLaVA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6336-6345} }
SWIFT: Sliding Window Reconstruction for Few-Shot Training-Free Generated Video Attribution: Chao Wang,

Zijin Yang,

Yaofei Wang,

Yuang Qi,

Weiming Zhang,

Nenghai Yu,

Kejiang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chao and Yang, Zijin and Wang, Yaofei and Qi, Yuang and Zhang, Weiming and Yu, Nenghai and Chen, Kejiang}, title = {SWIFT: Sliding Window Reconstruction for Few-Shot Training-Free Generated Video Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31725-31734} }
Vanast: Virtual Try-On with Human Image Animation via Synthetic Triplet Supervision: Hyunsoo Cha,

Wonjung Woo,

Byungjun Kim,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2026_CVPR, author = {Cha, Hyunsoo and Woo, Wonjung and Kim, Byungjun and Joo, Hanbyul}, title = {Vanast: Virtual Try-On with Human Image Animation via Synthetic Triplet Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3988-3997} }
TAPE: Task-Adaptive Prototype Evolution in Audio-Language Models for Fully Few-shot Class-incremental Audio Classification: Yunlong Gao,

Wenxin Liang,

Guanglu Wang,

Senqi Guan,

Linlin Zong,

Dongyu Zhang,

Xinyue Liu; [pdf]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yunlong and Liang, Wenxin and Wang, Guanglu and Guan, Senqi and Zong, Linlin and Zhang, Dongyu and Liu, Xinyue}, title = {TAPE: Task-Adaptive Prototype Evolution in Audio-Language Models for Fully Few-shot Class-incremental Audio Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19570-19579} }
Mapping Networks: Lord Sen,

Shyamapada Mukherjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sen_2026_CVPR, author = {Sen, Lord and Mukherjee, Shyamapada}, title = {Mapping Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36215-36223} }
Image Guides Images: Consistent Video Amodal Completion with Rectified In-Context Exemplar Guidance: Xiaoyu Kong,

Ketong Ren,

Dongyu She,

Weiming Dong,

Miao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Xiaoyu and Ren, Ketong and She, Dongyu and Dong, Weiming and Wang, Miao}, title = {Image Guides Images: Consistent Video Amodal Completion with Rectified In-Context Exemplar Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8257-8266} }
HG-Lane: High-Fidelity Generation of Lane Scenes under Adverse Weather and Lighting Conditions without Re-annotation: Daichao Zhao,

Qiupu Chen,

Feng He,

Xin Ning,

Qiankun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Daichao and Chen, Qiupu and He, Feng and Ning, Xin and Li, Qiankun}, title = {HG-Lane: High-Fidelity Generation of Lane Scenes under Adverse Weather and Lighting Conditions without Re-annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8439-8448} }
IAG: Input-aware Backdoor Attack on VLM-based Visual Grounding: Junxian Li,

Beining Xu,

Simin Chen,

Jiatong Li,

Jingdi Lei,

Haodong Zhao,

Di Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Junxian and Xu, Beining and Chen, Simin and Li, Jiatong and Lei, Jingdi and Zhao, Haodong and Zhang, Di}, title = {IAG: Input-aware Backdoor Attack on VLM-based Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27872-27883} }
ParticleGS: Learning Neural Gaussian Particle Dynamics from Videos for Prior-free Physical Motion Extrapolation: Jinsheng Quan,

Qiaowei Miao,

Yichao Xu,

Zizhuo Lin,

Ying Li,

Wei Yang,

Zhihui Li,

Yawei Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Quan_2026_CVPR, author = {Quan, Jinsheng and Miao, Qiaowei and Xu, Yichao and Lin, Zizhuo and Li, Ying and Yang, Wei and Li, Zhihui and Luo, Yawei}, title = {ParticleGS: Learning Neural Gaussian Particle Dynamics from Videos for Prior-free Physical Motion Extrapolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8331-8341} }
CoRoGS: Contextual Gaussian Splatting for Robust Large-Deviation View Synthesis: Xin Ma,

Peng Lu,

Yisong Chen,

Chengwei Pan,

Sheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Xin and Lu, Peng and Chen, Yisong and Pan, Chengwei and Li, Sheng}, title = {CoRoGS: Contextual Gaussian Splatting for Robust Large-Deviation View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8288-8297} }
MotionMaster: Generalizable Text-Driven Motion Generation and Editing: Nan Jiang,

Yunhao Li,

Lexi Pang,

Zimo He,

Siyuan Huang,

Yixin Zhu; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nan and Li, Yunhao and Pang, Lexi and He, Zimo and Huang, Siyuan and Zhu, Yixin}, title = {MotionMaster: Generalizable Text-Driven Motion Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30629-30639} }
ReAG: Reasoning-Augmented Generation for Knowledge-based Visual Question Answering: Alberto Compagnoni,

Marco Morini,

Sara Sarto,

Federico Cocchi,

Davide Caffagni,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Compagnoni_2026_CVPR, author = {Compagnoni, Alberto and Morini, Marco and Sarto, Sara and Cocchi, Federico and Caffagni, Davide and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {ReAG: Reasoning-Augmented Generation for Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11901-11911} }
LiDeRe: A Lightweight Readout for Fast and Data-Efficient Dense Prediction: Timo Lüddecke,

Jan Frederik Meier,

Jan van Delden,

Alexander Ecker; [pdf] [supp]
[bibtex]
@InProceedings{Luddecke_2026_CVPR, author = {L\"uddecke, Timo and Meier, Jan Frederik and van Delden, Jan and Ecker, Alexander}, title = {LiDeRe: A Lightweight Readout for Fast and Data-Efficient Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2959-2971} }
LATA: Laplacian-Assisted Transductive Adaptation for Conformal Uncertainty in Medical VLMs: Behzad Bozorgtabar,

Dwarikanath Mahapatra,

Sudipta Roy,

Muzammal Naseer,

Imran Razzak,

Zongyuan Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bozorgtabar_2026_CVPR, author = {Bozorgtabar, Behzad and Mahapatra, Dwarikanath and Roy, Sudipta and Naseer, Muzammal and Razzak, Imran and Ge, Zongyuan}, title = {LATA: Laplacian-Assisted Transductive Adaptation for Conformal Uncertainty in Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36311-36320} }
DFD-HR: Generalizable Deepfake Detection via Hierarchical Routing Learning: Jiamu Sun,

Zhiyuan Yan,

Ke-Yue Zhang,

Taiping Yao,

Shouhong Ding; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Jiamu and Yan, Zhiyuan and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong}, title = {DFD-HR: Generalizable Deepfake Detection via Hierarchical Routing Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13984-13995} }
Soul: Breathe Life into Digital Human for High-fidelity Long-term Multimodal Animation: Jiangning Zhang,

Junwei Zhu,

Zhenye Gan,

Donghao Luo,

Chuming Lin,

FeiFan Xu,

Xu Peng,

Jianlong Hu,

Yuansen Liu,

Yijia Hong,

Weijian Cao,

Han Feng,

Xu Chen,

Chencan Fu,

Keke He,

Xiaobin Hu,

Chengjie Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiangning and Zhu, Junwei and Gan, Zhenye and Luo, Donghao and Lin, Chuming and Xu, FeiFan and Peng, Xu and Hu, Jianlong and Liu, Yuansen and Hong, Yijia and Cao, Weijian and Feng, Han and Chen, Xu and Fu, Chencan and He, Keke and Hu, Xiaobin and Wang, Chengjie}, title = {Soul: Breathe Life into Digital Human for High-fidelity Long-term Multimodal Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3953-3964} }
Decouple to Generalize: Context-First Self-Evolving Learning for Data-Scarce Vision-Language Reasoning: Tingyu Li,

Zheng Sun,

Jingxuan Wei,

Conghui He,

Lijun Wu,

Cheng Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Tingyu and Sun, Zheng and Wei, Jingxuan and He, Conghui and Wu, Lijun and Tan, Cheng}, title = {Decouple to Generalize: Context-First Self-Evolving Learning for Data-Scarce Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29357-29366} }
AceTone: Bridging Words and Colors for Conditional Image Grading: Tianren Ma,

Mingxiang Liao,

Xijin Zhang,

Qixiang Ye; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tianren and Liao, Mingxiang and Zhang, Xijin and Ye, Qixiang}, title = {AceTone: Bridging Words and Colors for Conditional Image Grading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25851-25860} }
MORE-STEM: Long-Short MemOry REcall and Spatio-TEmporal Consistency Model for Query-Driven 3D/4D Point Cloud Segmentation: Chade Li,

Haida Feng,

Pengju Zhang,

Yihong Wu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chade and Feng, Haida and Zhang, Pengju and Wu, Yihong}, title = {MORE-STEM: Long-Short MemOry REcall and Spatio-TEmporal Consistency Model for Query-Driven 3D/4D Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31462-31471} }
Homaloidal parametrization for detecting critical two-view configurations: Rakshith Madhavan,

Matteo Forlivesi,

Marina Bertolini,

Cristina Turrini,

Federica Arrigoni,

Luca Magri; [pdf] [supp]
[bibtex]
@InProceedings{Madhavan_2026_CVPR, author = {Madhavan, Rakshith and Forlivesi, Matteo and Bertolini, Marina and Turrini, Cristina and Arrigoni, Federica and Magri, Luca}, title = {Homaloidal parametrization for detecting critical two-view configurations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26432-26440} }
BiEvLight: Bi-level Learning of Task-Aware Event Refinement for Low-Light Image Enhancement: Zishu Yao,

Xiang-Xiang Su,

Shengning Zhou,

Guang-Yong Chen,

Guodong Fan,

Xing Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Zishu and Su, Xiang-Xiang and Zhou, Shengning and Chen, Guang-Yong and Fan, Guodong and Chen, Xing}, title = {BiEvLight: Bi-level Learning of Task-Aware Event Refinement for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41541-41550} }
Data Leakage Detection and De-duplication in Large Scale Geospatial Image Datasets: Yeshwanth Kumar Adimoolam,

Charalambos Poullis,

Melinos Averkiou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Adimoolam_2026_CVPR, author = {Adimoolam, Yeshwanth Kumar and Poullis, Charalambos and Averkiou, Melinos}, title = {Data Leakage Detection and De-duplication in Large Scale Geospatial Image Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {72-81} }
ConsistCompose: Unified Multimodal Layout Control for Image Composition: Xuanke Shi,

Boxuan Li,

Xiaoyang Han,

Zhongang Cai,

Lei Yang,

Quan Wang,

Dahua Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Xuanke and Li, Boxuan and Han, Xiaoyang and Cai, Zhongang and Yang, Lei and Wang, Quan and Lin, Dahua}, title = {ConsistCompose: Unified Multimodal Layout Control for Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {495-505} }
Generate, Analyze, and Refine: Training-Free Sound Source Localization via MLLM Meta-Reasoning: Subin Park,

Jung Uk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Subin and Kim, Jung Uk}, title = {Generate, Analyze, and Refine: Training-Free Sound Source Localization via MLLM Meta-Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15752-15761} }
Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation: Tzu Ling Liu,

Ian Stavness,

Mrigank Rochan; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Tzu Ling and Stavness, Ian and Rochan, Mrigank}, title = {Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31262-31271} }
RxnCaption: Reformulating Reaction Diagram Parsing as Visual Prompt Guided Captioning: Jiahe Song,

Chuang Wang,

Bowen Jiang,

Yinfan Wang,

Hao Zheng,

Xingjian Wei,

Chengjin Liu,

Rui Nie,

Junyuan Gao,

Jiaxing Sun,

Yubin Wang,

Lijun Wu,

Zhenhua Huang,

Jiang Wu,

Qian Yu,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Jiahe and Wang, Chuang and Jiang, Bowen and Wang, Yinfan and Zheng, Hao and Wei, Xingjian and Liu, Chengjin and Nie, Rui and Gao, Junyuan and Sun, Jiaxing and Wang, Yubin and Wu, Lijun and Huang, Zhenhua and Wu, Jiang and Yu, Qian and He, Conghui}, title = {RxnCaption: Reformulating Reaction Diagram Parsing as Visual Prompt Guided Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30864-30873} }
Multi-Patch Global-to-Local Transformer Architecture For Efficient Flow Matching and Diffusion Model: Quan Dao,

Dimitris Metaxas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dao_2026_CVPR, author = {Dao, Quan and Metaxas, Dimitris}, title = {Multi-Patch Global-to-Local Transformer Architecture For Efficient Flow Matching and Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33000-33011} }
iSplat: Iterative Learning for Fine-Grained Gaussian Splatting: Haifeng Wu,

Wei Long,

Shuhang Gu,

Lixin Duan,

Wen Li; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haifeng and Long, Wei and Gu, Shuhang and Duan, Lixin and Li, Wen}, title = {iSplat: Iterative Learning for Fine-Grained Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11746-11755} }
Gyro-based Deep Video Deblurring: Jaesung Rim,

Woohyeok Kim,

Haeyun Lee,

Heemin Yang,

Ke Wang,

Sunghyun Cho; [pdf] [supp]
[bibtex]
@InProceedings{Rim_2026_CVPR, author = {Rim, Jaesung and Kim, Woohyeok and Lee, Haeyun and Yang, Heemin and Wang, Ke and Cho, Sunghyun}, title = {Gyro-based Deep Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8364-8374} }
Detect Anything via Next Point Prediction: Qing Jiang,

Junan Huo,

Xingyu Chen,

Yuda Xiong,

Zhaoyang Zeng,

Yihao Chen,

Tianhe Ren,

Junzhi Yu,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Qing and Huo, Junan and Chen, Xingyu and Xiong, Yuda and Zeng, Zhaoyang and Chen, Yihao and Ren, Tianhe and Yu, Junzhi and Zhang, Lei}, title = {Detect Anything via Next Point Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25472-25483} }
VidEoMT: Your ViT is Secretly Also a Video Segmentation Model: Narges Norouzi,

Idil Esen Zulfikar,

Niccolò Cavagnero,

Tommie Kerssies,

Bastian Leibe,

Gijs Dubbelman,

Daan de Geus; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Norouzi_2026_CVPR, author = {Norouzi, Narges and Zulfikar, Idil Esen and Cavagnero, Niccol\`o and Kerssies, Tommie and Leibe, Bastian and Dubbelman, Gijs and de Geus, Daan}, title = {VidEoMT: Your ViT is Secretly Also a Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35177-35186} }
PSDesigner: Automated Graphic Design with a Human-Like Creative Workflow: Xincheng Shuai,

Song Tang,

Yutong Huang,

Henghui Ding,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shuai_2026_CVPR, author = {Shuai, Xincheng and Tang, Song and Huang, Yutong and Ding, Henghui and Tao, Dacheng}, title = {PSDesigner: Automated Graphic Design with a Human-Like Creative Workflow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10165-10175} }
VAST: Video Ability-Stratified Taxonomy for Data-Efficient Video Reasoning: Zhongan Wang,

Xiaoyu Wen,

Lingxiao Du,

Kun Li,

Zhiliang Wu,

Xingcheng Xu,

Qiaosheng Zhang,

Chaochao Lu,

Hehe Fan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongan and Wen, Xiaoyu and Du, Lingxiao and Li, Kun and Wu, Zhiliang and Xu, Xingcheng and Zhang, Qiaosheng and Lu, Chaochao and Fan, Hehe}, title = {VAST: Video Ability-Stratified Taxonomy for Data-Efficient Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18576-18586} }
HQC-NBV: A Hybrid Quantum-Classical View Planning Approach: Xiaotong Yu,

Chang Wen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Xiaotong and Chen, Chang Wen}, title = {HQC-NBV: A Hybrid Quantum-Classical View Planning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35091-35100} }
TWEO: Transformers Without Extreme Outliers Enables FP8 Training And Quantization For Dummies: Guang Liang,

Jie Shao,

Ningyuan Tang,

Xinyao Liu,

Jianxin Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Guang and Shao, Jie and Tang, Ningyuan and Liu, Xinyao and Wu, Jianxin}, title = {TWEO: Transformers Without Extreme Outliers Enables FP8 Training And Quantization For Dummies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6095-6105} }
Multi-view Consistent 3D Gaussian Head Avatars 'without' Multi-view Generation: Aviral Chharia,

Fernando De la Torre; [pdf]
[bibtex]
@InProceedings{Chharia_2026_CVPR, author = {Chharia, Aviral and De la Torre, Fernando}, title = {Multi-view Consistent 3D Gaussian Head Avatars 'without' Multi-view Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40163-40174} }
HandVQA: Diagnosing and Improving Fine-Grained Spatial Reasoning about Hands in Vision-Language Models: MD Khalequzzaman Chowdhury Sayem,

Mubarrat Tajoar Chowdhury,

Yihalem Yimolal Tiruneh,

Muneeb A. Khan,

Muhammad Salman Ali,

Binod Bhattarai,

Seungryul Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sayem_2026_CVPR, author = {Sayem, MD Khalequzzaman Chowdhury and Chowdhury, Mubarrat Tajoar and Tiruneh, Yihalem Yimolal and Khan, Muneeb A. and Ali, Muhammad Salman and Bhattarai, Binod and Baek, Seungryul}, title = {HandVQA: Diagnosing and Improving Fine-Grained Spatial Reasoning about Hands in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2515-2525} }
FALCON: False-Negative Aware Learning of Contrastive Negatives in Vision-Language Alignment: Myunsoo Kim,

Seongwoong Shim,

Byung-Jun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Myunsoo and Shim, Seongwoong and Lee, Byung-Jun}, title = {FALCON: False-Negative Aware Learning of Contrastive Negatives in Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {701-711} }
D2T2 - Multimodal Automated Planning for Brachytherapy: Lance C. Moore,

Aranyo Mitra,

Ryan Truong,

Karoline Kallis,

Kelly Kisling,

Sandra M. Meyers,

Nuno Vasconcelos; [pdf] [supp]
[bibtex]
@InProceedings{Moore_2026_CVPR, author = {Moore, Lance C. and Mitra, Aranyo and Truong, Ryan and Kallis, Karoline and Kisling, Kelly and Meyers, Sandra M. and Vasconcelos, Nuno}, title = {D2T2 - Multimodal Automated Planning for Brachytherapy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42670-42680} }
Toward Early Quality Assessment of Text-to-Image Diffusion Models: Huanlei Guo,

Hongxin Wei,

Bingyi Jing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Huanlei and Wei, Hongxin and Jing, Bingyi}, title = {Toward Early Quality Assessment of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38410-38419} }
Semantic Foam: Unifying Spatial and Semantic Scene Decomposition: Amr Sharafeldin,

Aryan Mikaeili,

Thomas Walker,

Shrisudhan Govindarajan,

Daniel Rebain,

Kwang Moo Yi,

Andrea Tagliasacchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharafeldin_2026_CVPR, author = {Sharafeldin, Amr and Mikaeili, Aryan and Walker, Thomas and Govindarajan, Shrisudhan and Rebain, Daniel and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Semantic Foam: Unifying Spatial and Semantic Scene Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29814-29823} }
MMFace-DiT: A Dual-Stream Diffusion Transformer for High-Fidelity Multimodal Face Generation: Bharath Krishnamurthy,

Ajita Rattani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Krishnamurthy_2026_CVPR, author = {Krishnamurthy, Bharath and Rattani, Ajita}, title = {MMFace-DiT: A Dual-Stream Diffusion Transformer for High-Fidelity Multimodal Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4580-4589} }
Mantis: A Versatile Vision-Language-Action Model with Disentangled Visual Foresight: Yi Yang,

Xueqi Li,

Yiyang Chen,

Jin Song,

Yihan Wang,

Zipeng Xiao,

Jiadi Su,

You Qiaoben,

Pengfei Liu,

Zhijie Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Li, Xueqi and Chen, Yiyang and Song, Jin and Wang, Yihan and Xiao, Zipeng and Su, Jiadi and Qiaoben, You and Liu, Pengfei and Deng, Zhijie}, title = {Mantis: A Versatile Vision-Language-Action Model with Disentangled Visual Foresight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42505-42515} }
3D-Aware Implicit Motion Control for View-Adaptive Human Video Generation: Zhixue Fang,

Xu He,

Songlin Tang,

Haoxian Zhang,

Qingfeng Li,

Xiaoqiang Liu,

Pengfei Wan,

Kun Gai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Zhixue and He, Xu and Tang, Songlin and Zhang, Haoxian and Li, Qingfeng and Liu, Xiaoqiang and Wan, Pengfei and Gai, Kun}, title = {3D-Aware Implicit Motion Control for View-Adaptive Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2243-2252} }
Graph2Eval: Automatic Multimodal Task Generation for Agents via Knowledge Graphs: Yurun Chen,

Xueyu Hu,

Yuhan Liu,

Ziqi Wang,

Zeyi Liao,

Lin Chen,

Feng Wei,

Yuxi Qian,

Bo Zheng,

Keting Yin,

Shengyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yurun and Hu, Xueyu and Liu, Yuhan and Wang, Ziqi and Liao, Zeyi and Chen, Lin and Wei, Feng and Qian, Yuxi and Zheng, Bo and Yin, Keting and Zhang, Shengyu}, title = {Graph2Eval: Automatic Multimodal Task Generation for Agents via Knowledge Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {735-744} }
DetectSCI: Toward Object-Guided ROI Reconstruction for High-Resolution Video Snapshot Compressive Imaging: Xingjian Jiang,

Lishun Wang,

Ping Wang,

Xin Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xingjian and Wang, Lishun and Wang, Ping and Yuan, Xin}, title = {DetectSCI: Toward Object-Guided ROI Reconstruction for High-Resolution Video Snapshot Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41720-41729} }
Harmonious Parameter Adaptation in Continual Visual Instruction Tuning for Safety-Aligned MLLMs: Ziqi Wang,

Chang Che,

Qi Wang,

Hui Ma,

Zenglin Shi,

Cees G. M. Snoek,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziqi and Che, Chang and Wang, Qi and Ma, Hui and Shi, Zenglin and Snoek, Cees G. M. and Wang, Meng}, title = {Harmonious Parameter Adaptation in Continual Visual Instruction Tuning for Safety-Aligned MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17282-17291} }
Explaining CLIP Zero-shot Predictions Through Concepts: Onat Ozdemir,

Anders Christensen,

Stephan Alaniz,

Zeynep Akata,

Emre Akbas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ozdemir_2026_CVPR, author = {Ozdemir, Onat and Christensen, Anders and Alaniz, Stephan and Akata, Zeynep and Akbas, Emre}, title = {Explaining CLIP Zero-shot Predictions Through Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31336-31345} }
From Pairs to Sequences: Track-Aware Policy Gradients for Keypoint Detection: Yepeng Liu,

Hao Li,

Liwen Yang,

Fangzhen Li,

Xudi Ge,

Yuliang Gu,

Kuang Gao,

Bing Wang,

Guang Chen,

Hangjun Ye,

Yongchao Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yepeng and Li, Hao and Yang, Liwen and Li, Fangzhen and Ge, Xudi and Gu, Yuliang and Gao, Kuang and Wang, Bing and Chen, Guang and Ye, Hangjun and Xu, Yongchao}, title = {From Pairs to Sequences: Track-Aware Policy Gradients for Keypoint Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21453-21463} }
SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection: Sandro Papais,

Lezhou Feng,

Charles Cossette,

Lingting Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Papais_2026_CVPR, author = {Papais, Sandro and Feng, Lezhou and Cossette, Charles and Ge, Lingting}, title = {SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40751-40761} }
Consensus vs. Controversy: Mapping the Decision Space Where Architectures Diverge: Minhyeok Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {Consensus vs. Controversy: Mapping the Decision Space Where Architectures Diverge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34566-34574} }
VisionLeaf: Entropy-Guided Leaf-First Reasoning for Efficient and Accurate Think-with-Image: Haokun Gui,

Senqiao Yang,

Mingkang Zhu,

Meng Chu,

Sitong Wu,

Changsheng Lu,

Zihao Wang,

Zhuotao Tian,

Jiaya Jia; [pdf] [supp]
[bibtex]
@InProceedings{Gui_2026_CVPR, author = {Gui, Haokun and Yang, Senqiao and Zhu, Mingkang and Chu, Meng and Wu, Sitong and Lu, Changsheng and Wang, Zihao and Tian, Zhuotao and Jia, Jiaya}, title = {VisionLeaf: Entropy-Guided Leaf-First Reasoning for Efficient and Accurate Think-with-Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5187-5198} }
SIR: Structured Image Representations for Explainable Robot Learning: Paul Mattes,

Jan Schwab,

Jens Bosch,

Maximilian Xiling Li,

Nils Blank,

Minh-Trung Tang,

Moritz Haberland,

Rudolf Lioutikov; [pdf] [supp]
[bibtex]
@InProceedings{Mattes_2026_CVPR, author = {Mattes, Paul and Schwab, Jan and Bosch, Jens and Li, Maximilian Xiling and Blank, Nils and Tang, Minh-Trung and Haberland, Moritz and Lioutikov, Rudolf}, title = {SIR: Structured Image Representations for Explainable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42484-42493} }
Unified Generation and Self-Verification for Vision-Language Models via Advantage Decoupled Preference Optimization: Xinyu Qiu,

Heng Jia,

Zhengwen Zeng,

Shuheng Shen,

Changhua Meng,

Yi Yang,

Linchao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xinyu and Jia, Heng and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Yang, Yi and Zhu, Linchao}, title = {Unified Generation and Self-Verification for Vision-Language Models via Advantage Decoupled Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36916-36925} }
Towards Calibrating Prompt Tuning of Vision- Language Models: Ashshak Sharifdeen,

Fahad Shamshad,

Muhammad Akhtar Munir,

Abhishek Basu,

Mohamed Ismithdeen,

Jeyapriyan Jeyamohan,

Chathurika Silva,

Karthik Nandakumar,

Muhammad Haris Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharifdeen_2026_CVPR, author = {Sharifdeen, Ashshak and Shamshad, Fahad and Munir, Muhammad Akhtar and Basu, Abhishek and Ismithdeen, Mohamed and Jeyamohan, Jeyapriyan and Silva, Chathurika and Nandakumar, Karthik and Khan, Muhammad Haris}, title = {Towards Calibrating Prompt Tuning of Vision- Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39131-39140} }
Do Less, Achieve More: Do We Need Every-Step Optimization for RL Fine-tuning of Diffusion Models?: Renye Yan,

Jikang Cheng,

Shikun Sun,

Yi Sun,

You Wu,

Wei Peng,

Zongwei Wang,

Ling Liang,

Junliang Xing,

Yimao Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Renye and Cheng, Jikang and Sun, Shikun and Sun, Yi and Wu, You and Peng, Wei and Wang, Zongwei and Liang, Ling and Xing, Junliang and Cai, Yimao}, title = {Do Less, Achieve More: Do We Need Every-Step Optimization for RL Fine-tuning of Diffusion Models?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16561-16571} }
EgoX: Egocentric Video Generation from a Single Exocentric Video: Taewoong Kang,

Kinam Kim,

Dohyeon Kim,

Minho Park,

Junha Hyung,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Taewoong and Kim, Kinam and Kim, Dohyeon and Park, Minho and Hyung, Junha and Choo, Jaegul}, title = {EgoX: Egocentric Video Generation from a Single Exocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11116-11126} }
Rethinking Glyph Spatial Information in Font Generation: Peng Su,

Xi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Peng and Yang, Xi}, title = {Rethinking Glyph Spatial Information in Font Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29190-29199} }
Synthetic Curriculum Reinforces Compositional Text-to-Image Generation: Shijian Wang,

Runhao Fu,

Siyi Zhao,

Qingqin Zhan,

Xingjian Wang,

Jiarui Jin,

Yuan Lu,

Hanqian Wu,

Cunjian Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shijian and Fu, Runhao and Zhao, Siyi and Zhan, Qingqin and Wang, Xingjian and Jin, Jiarui and Lu, Yuan and Wu, Hanqian and Chen, Cunjian}, title = {Synthetic Curriculum Reinforces Compositional Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21953-21963} }
Revisiting Multimodal KV Cache Compression: A Frequency-Domain-Guided Outlier-KV-Aware Approach: Yaoxin Yang,

Peng Ye,

Xudong Tan,

Chongjun Tu,

Maosen Zhao,

Jia Hao,

Tao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yaoxin and Ye, Peng and Tan, Xudong and Tu, Chongjun and Zhao, Maosen and Hao, Jia and Chen, Tao}, title = {Revisiting Multimodal KV Cache Compression: A Frequency-Domain-Guided Outlier-KV-Aware Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39550-39560} }
Opti-NeuS: Neural Reconstruction for Dual-Layered Transparent and Opaque Objects: Yi Yang,

Gaoyang Zhang,

Jun Tan,

Xinguo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Zhang, Gaoyang and Tan, Jun and Liu, Xinguo}, title = {Opti-NeuS: Neural Reconstruction for Dual-Layered Transparent and Opaque Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22594-22603} }
Dynamic Important Example Mining for Reinforcement Finetuning: Haoru Tan,

Sitong Wu,

Yanfeng Chen,

Shizhen Zhao,

Yang-Tian Sun,

Tianjia Liu,

Chirui Chang,

Shaofeng Zhang,

Samm Sun,

Xiuzhe Wu,

Ruobing Xie,

Xiaojuan Qi; [pdf]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Haoru and Wu, Sitong and Chen, Yanfeng and Zhao, Shizhen and Sun, Yang-Tian and Liu, Tianjia and Chang, Chirui and Zhang, Shaofeng and Sun, Samm and Wu, Xiuzhe and Xie, Ruobing and Qi, Xiaojuan}, title = {Dynamic Important Example Mining for Reinforcement Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41456-41466} }
SPOT: Spatiotemporal Prompt Optimization for Motion-Stabilized MLLM-Guided Video Segmentation: Jiayi Fan,

Zheyun Qin,

Xiaoming Xi,

Xiushan Nie,

Yilong Yin; [pdf]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Jiayi and Qin, Zheyun and Xi, Xiaoming and Nie, Xiushan and Yin, Yilong}, title = {SPOT: Spatiotemporal Prompt Optimization for Motion-Stabilized MLLM-Guided Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32236-32245} }
The Missing GAP: From Solving Square Jigsaw Puzzles to Handling Real World Archaeological Fragments: Ofir Itzhak Shahar,

Gur Elkin,

Ohad Ben-Shahar; [pdf] [supp]
[bibtex]
@InProceedings{Shahar_2026_CVPR, author = {Shahar, Ofir Itzhak and Elkin, Gur and Ben-Shahar, Ohad}, title = {The Missing GAP: From Solving Square Jigsaw Puzzles to Handling Real World Archaeological Fragments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3186-3196} }
COT-FM: Cluster-wise Optimal Transport Flow Matching: Chiensheng Chiang,

Kuan-Hsun Tu,

Jia-Wei Liao,

Cheng-Fu Chou,

Tsung-Wei Ke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chiang_2026_CVPR, author = {Chiang, Chiensheng and Tu, Kuan-Hsun and Liao, Jia-Wei and Chou, Cheng-Fu and Ke, Tsung-Wei}, title = {COT-FM: Cluster-wise Optimal Transport Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11515-11524} }
KASALv2: Fully Automatic 3D Rotational Symmetry Classification and Axis Localization: Mengxin Zhang,

Yulin Wang,

Chen Luo,

Yongzhe Li,

Yijun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mengxin and Wang, Yulin and Luo, Chen and Li, Yongzhe and Zhou, Yijun}, title = {KASALv2: Fully Automatic 3D Rotational Symmetry Classification and Axis Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13866-13875} }
Perceptual-Evidence Anchored Reinforced Learning for Multimodal Reasoning: Chi Zhang,

Haibo Qiu,

Qiming Zhang,

Yufei Xu,

Zhixiong Zeng,

Siqi Yang,

Peng Shi,

Lin Ma,

Jing Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Xu, Yufei and Zeng, Zhixiong and Yang, Siqi and Shi, Peng and Ma, Lin and Zhang, Jing}, title = {Perceptual-Evidence Anchored Reinforced Learning for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41111-41120} }
Drive My Way: Preference Alignment of Vision-Language-Action Model for Personalized Driving: Zehao Wang,

Huaide Jiang,

Shuaiwu Dong,

Yuping Wang,

Hang Qiu,

Jiachen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zehao and Jiang, Huaide and Dong, Shuaiwu and Wang, Yuping and Qiu, Hang and Li, Jiachen}, title = {Drive My Way: Preference Alignment of Vision-Language-Action Model for Personalized Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25204-25214} }
EventGait: Towards Robust Gait Recognition with Event Streams: Senyan Xu,

Shuai Chen,

Chuanfu Shen,

Kean Liu,

Zhijing Sun,

Chengzhi Cao,

Xueyang Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Senyan and Chen, Shuai and Shen, Chuanfu and Liu, Kean and Sun, Zhijing and Cao, Chengzhi and Fu, Xueyang}, title = {EventGait: Towards Robust Gait Recognition with Event Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22323-22334} }
LF-BVN: Blind-View Network for Self-Supervised Light Field Denoising: Longzhao Guo,

Shuo Zhang,

Chen Gao,

Qian Tian,

Youfang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Longzhao and Zhang, Shuo and Gao, Chen and Tian, Qian and Lin, Youfang}, title = {LF-BVN: Blind-View Network for Self-Supervised Light Field Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1355-1364} }
Live Interactive Training for Video Segmentation: Xinyu Yang,

Haozheng Yu,

Yihong Sun,

Bharath Hariharan,

Jennifer J. Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xinyu and Yu, Haozheng and Sun, Yihong and Hariharan, Bharath and Sun, Jennifer J.}, title = {Live Interactive Training for Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39827-39837} }
Learning Long-term Motion Embeddings for Efficient Kinematics Generation: Nick Stracke,

Kolja Bauer,

Stefan Andreas Baumann,

Miguel Ángel Bautista,

Josh Susskind,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stracke_2026_CVPR, author = {Stracke, Nick and Bauer, Kolja and Baumann, Stefan Andreas and Bautista, Miguel \'Angel and Susskind, Josh and Ommer, Bj\"orn}, title = {Learning Long-term Motion Embeddings for Efficient Kinematics Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42581-42591} }
Test-time Sparsity for Extreme Fast Action Diffusion: Kangye Ji,

Yuan Meng,

Jianbo Zhou,

Ye Li,

Chen Tang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Kangye and Meng, Yuan and Zhou, Jianbo and Li, Ye and Tang, Chen and Wang, Zhi}, title = {Test-time Sparsity for Extreme Fast Action Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9414-9423} }
MedFG-VQA: Low-Frequency Memory and Graph Attention for Lightweight Medical VQA: Haowen Gu,

Gensheng Pei,

Zeren Sun,

Mingwu Ren,

Xiangbo Shu,

Yazhou Yao,

Fumin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Haowen and Pei, Gensheng and Sun, Zeren and Ren, Mingwu and Shu, Xiangbo and Yao, Yazhou and Shen, Fumin}, title = {MedFG-VQA: Low-Frequency Memory and Graph Attention for Lightweight Medical VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42755-42764} }
FastRef: Fast Prototype Refinement for Few-shot Industrial Anomaly Detection: Yufei Li,

Long Tian,

Yuyang Dai,

Wenchao Chen,

Liang Bao,

Xiyang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yufei and Tian, Long and Dai, Yuyang and Chen, Wenchao and Bao, Liang and Liu, Xiyang}, title = {FastRef: Fast Prototype Refinement for Few-shot Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43040-43049} }
SaPaVe: Towards Active Perception and Manipulation in Vision-Language Action Models for Robotics: Mengzhen Liu,

Enshen Zhou,

Cheng Chi,

Yi Han,

Shanyu Rong,

Liming Chen,

Pengwei Wang,

Zhongyuan Wang,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mengzhen and Zhou, Enshen and Chi, Cheng and Han, Yi and Rong, Shanyu and Chen, Liming and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {SaPaVe: Towards Active Perception and Manipulation in Vision-Language Action Models for Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37164-37174} }
Understanding Task Transfer in Vision-Language Models: Bhuvan Sachdeva,

Karan Uppal,

Abhinav Java,

Vineeth N. Balasubramanian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sachdeva_2026_CVPR, author = {Sachdeva, Bhuvan and Uppal, Karan and Java, Abhinav and Balasubramanian, Vineeth N.}, title = {Understanding Task Transfer in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28754-28763} }
The Universal Normal Embedding: Chen Tasker,

Roy Betser,

Eyal Gofer,

Meir Yossef Levi,

Guy Gilboa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tasker_2026_CVPR, author = {Tasker, Chen and Betser, Roy and Gofer, Eyal and Levi, Meir Yossef and Gilboa, Guy}, title = {The Universal Normal Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32015-32025} }
Look Before You Fuse: 2D-Guided Cross-Modal Alignment for Robust 3D Detection: Xiang Li,

Zhangchi Hu,

Xu Xiao,

Bin Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Hu, Zhangchi and Xiao, Xu and Kong, Bin}, title = {Look Before You Fuse: 2D-Guided Cross-Modal Alignment for Robust 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11589-11598} }
Rethinking SNN Online Training and Deployment: Gradient-Coherent Learning via Hybrid-Driven LIF Model: Zecheng Hao,

Yifan Huang,

Zijie Xu,

Wenxuan Liu,

Yuanhong Tang,

Zhaofei Yu,

Tiejun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2026_CVPR, author = {Hao, Zecheng and Huang, Yifan and Xu, Zijie and Liu, Wenxuan and Tang, Yuanhong and Yu, Zhaofei and Huang, Tiejun}, title = {Rethinking SNN Online Training and Deployment: Gradient-Coherent Learning via Hybrid-Driven LIF Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20202-20211} }
The devil is in the details: Enhancing Video Virtual Try-On via Keyframe-Driven Details Injection: Qingdong He,

Xueqin Chen,

Yanjie Pan,

Peng Tang,

Pengcheng Xu,

Zhenye Gan,

Chengjie Wang,

Xiaobin Hu,

Jiangning Zhang,

Yabiao Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qingdong and Chen, Xueqin and Pan, Yanjie and Tang, Peng and Xu, Pengcheng and Gan, Zhenye and Wang, Chengjie and Hu, Xiaobin and Zhang, Jiangning and Wang, Yabiao}, title = {The devil is in the details: Enhancing Video Virtual Try-On via Keyframe-Driven Details Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9182-9191} }
PFGNet: A Fully Convolutional Frequency-Guided Peripheral Gating Network for Efficient Spatiotemporal Predictive Learning: Xinyong Cai,

Changbin Sun,

Yong Wang,

Hongyu Yang,

Yuankai Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Xinyong and Sun, Changbin and Wang, Yong and Yang, Hongyu and Wu, Yuankai}, title = {PFGNet: A Fully Convolutional Frequency-Guided Peripheral Gating Network for Efficient Spatiotemporal Predictive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38848-38858} }
MMTIT-Bench: A Multilingual and Multi-Scenario Benchmark with Cognition-Perception-Reasoning Guided Text-Image Machine Translation: Gengluo Li,

Chengquan Zhang,

Yupu Liang,

Huawen Shen,

Yaping Zhang,

Pengyuan Lyu,

Weinong Wang,

Xingyu Wan,

Gangyan Zeng,

Han Hu,

Can Ma,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Gengluo and Zhang, Chengquan and Liang, Yupu and Shen, Huawen and Zhang, Yaping and Lyu, Pengyuan and Wang, Weinong and Wan, Xingyu and Zeng, Gangyan and Hu, Han and Ma, Can and Zhou, Yu}, title = {MMTIT-Bench: A Multilingual and Multi-Scenario Benchmark with Cognition-Perception-Reasoning Guided Text-Image Machine Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16593-16602} }
Interact2Ar: Full-Body Human-Human Interaction Generation via Autoregressive Diffusion Models: Pablo Ruiz-Ponce,

Sergio Escalera,

José García-Rodríguez,

Jiankang Deng,

Rolandos Alexandros Potamias; [pdf] [supp]
[bibtex]
@InProceedings{Ruiz-Ponce_2026_CVPR, author = {Ruiz-Ponce, Pablo and Escalera, Sergio and Garc{\'\i}a-Rodr{\'\i}guez, Jos\'e and Deng, Jiankang and Potamias, Rolandos Alexandros}, title = {Interact2Ar: Full-Body Human-Human Interaction Generation via Autoregressive Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23559-23569} }
ChronoGS: Disentangling Invariants and Changes in Multi-Period Scenes: Zhongtao Wang,

Jiaqi Dai,

Qingtian Zhu,

Yilong Li,

Mai Su,

Fei Zhu,

Meng Gai,

Shaorong Wang,

Chengwei Pan,

Yisong Chen,

Guoping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongtao and Dai, Jiaqi and Zhu, Qingtian and Li, Yilong and Su, Mai and Zhu, Fei and Gai, Meng and Wang, Shaorong and Pan, Chengwei and Chen, Yisong and Wang, Guoping}, title = {ChronoGS: Disentangling Invariants and Changes in Multi-Period Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8298-8307} }
Progress-Think: Semantic Progress Reasoning for Vision-Language Navigation: Shuo Wang,

Yucheng Wang,

Guoxin Lian,

Yongcai Wang,

Maiyue Chen,

Kaihui Wang,

Bo Zhang,

Zhizhong Su,

Yutian Zhou,

Wanting Li,

Deying Li,

Zhaoxin Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shuo and Wang, Yucheng and Lian, Guoxin and Wang, Yongcai and Chen, Maiyue and Wang, Kaihui and Zhang, Bo and Su, Zhizhong and Zhou, Yutian and Li, Wanting and Li, Deying and Fan, Zhaoxin}, title = {Progress-Think: Semantic Progress Reasoning for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4076-4086} }
PRIMU: Uncertainty Estimation for Novel Views in Gaussian Splatting from Primitive-Based Representations of Error and Coverage: Thomas Gottwald,

Edgar Heinert,

Peter Stehr,

Chamuditha Jayanga Galappaththige,

Matthias Rottmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gottwald_2026_CVPR, author = {Gottwald, Thomas and Heinert, Edgar and Stehr, Peter and Galappaththige, Chamuditha Jayanga and Rottmann, Matthias}, title = {PRIMU: Uncertainty Estimation for Novel Views in Gaussian Splatting from Primitive-Based Representations of Error and Coverage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11871-11880} }
A Supervised Multi-task Framework for Joint cryo-ET Restoration Enabled by Generative Physical Simulation: Xinsheng Wang,

Zhidong Yang,

Xiaohua Wan,

Renmin Han,

Shuai Tang,

Hao Dong,

Fa Zhang,

Bin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinsheng and Yang, Zhidong and Wan, Xiaohua and Han, Renmin and Tang, Shuai and Dong, Hao and Zhang, Fa and Hu, Bin}, title = {A Supervised Multi-task Framework for Joint cryo-ET Restoration Enabled by Generative Physical Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21057-21066} }
AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects: Danrui Li,

Jiahao Zhang,

Bernhard Egger,

Moitreya Chatterjee,

Suhas Lohit,

Tim K. Marks,

Anoop Cherian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Danrui and Zhang, Jiahao and Egger, Bernhard and Chatterjee, Moitreya and Lohit, Suhas and Marks, Tim K. and Cherian, Anoop}, title = {AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17326-17335} }
RoSAMDepth: Robust Self-supervised Depth Estimation Leveraging Segment Anything Model: Xuanang Gao,

Zhiwei Ning,

Gengming Zhang,

Jiaxi Cao,

Runze Yang,

Zhonglong Zheng,

Jie Yang,

Rong Xiao,

Wei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Xuanang and Ning, Zhiwei and Zhang, Gengming and Cao, Jiaxi and Yang, Runze and Zheng, Zhonglong and Yang, Jie and Xiao, Rong and Liu, Wei}, title = {RoSAMDepth: Robust Self-supervised Depth Estimation Leveraging Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34138-34147} }
IVAAN: Instance-level Vision-Language Alignment via Attribute-Guided Text Prompts Generation for Nuclei Analysis: Jaehoon Jeong,

Yi Hu,

Soopil Kim,

Jongseong Jang,

Soonyoung Lee,

Sang Hyun Park; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Jaehoon and Hu, Yi and Kim, Soopil and Jang, Jongseong and Lee, Soonyoung and Park, Sang Hyun}, title = {IVAAN: Instance-level Vision-Language Alignment via Attribute-Guided Text Prompts Generation for Nuclei Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29305-29314} }
FedAlign: Differentially Private Distribution Alignment for Non-IID Federated Learning: Peng Wu,

Jiapeng Zhang,

Yingjie Song,

Xiong Xiao,

Zhuo Tang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Peng and Zhang, Jiapeng and Song, Yingjie and Xiao, Xiong and Tang, Zhuo}, title = {FedAlign: Differentially Private Distribution Alignment for Non-IID Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31769-31778} }
SPAR: Single-Pass Any-Resolution ViT for Open-vocabulary Segmentation: Naomi Kombol,

Ivan Martinović,

Siniša Šegvić,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Kombol_2026_CVPR, author = {Kombol, Naomi and Martinovi\'c, Ivan and \v{S}egvi\'c, Sini\v{s}a and Tolias, Giorgos}, title = {SPAR: Single-Pass Any-Resolution ViT for Open-vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27579-27589} }
NeoVerse: Enhancing 4D World Model with in-the-wild Monocular Videos: Yuxue Yang,

Lue Fan,

Ziqi Shi,

Junran Peng,

Feng Wang,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuxue and Fan, Lue and Shi, Ziqi and Peng, Junran and Wang, Feng and Zhang, Zhaoxiang}, title = {NeoVerse: Enhancing 4D World Model with in-the-wild Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40340-40351} }
PyramidalWan: On Making Pretrained Video Model Pyramidal for Efficient Inference: Denis Korzhenkov,

Adil Karjauv,

Animesh Karnewar,

Mohsen Ghafoorian,

Amirhossein Habibian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Korzhenkov_2026_CVPR, author = {Korzhenkov, Denis and Karjauv, Adil and Karnewar, Animesh and Ghafoorian, Mohsen and Habibian, Amirhossein}, title = {PyramidalWan: On Making Pretrained Video Model Pyramidal for Efficient Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16153-16162} }
Revisiting Model Stitching In the Foundation Model Era: Zheda Mai,

Ke Zhang,

Fu-En Wang,

Zixiao Ken Wang,

Albert Y. C. Chen,

Lu Xia,

Min Sun,

Wei-Lun Chao,

Cheng-Hao Kuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Zheda and Zhang, Ke and Wang, Fu-En and Wang, Zixiao Ken and Chen, Albert Y. C. and Xia, Lu and Sun, Min and Chao, Wei-Lun and Kuo, Cheng-Hao}, title = {Revisiting Model Stitching In the Foundation Model Era}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41342-41351} }
Simple but Effective Triplet-Based Compression Strategies for Compact Visual Localization: Torsten Sattler,

Zuzana Kukelova; [pdf] [supp]
[bibtex]
@InProceedings{Sattler_2026_CVPR, author = {Sattler, Torsten and Kukelova, Zuzana}, title = {Simple but Effective Triplet-Based Compression Strategies for Compact Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29047-29059} }
Nestwork: Conditional 3D Furnished House Layout Generation through Latent Heterogeneous Graph Diffusion: Shuhan Miao,

Biru Cao,

Junling Zhuang; [pdf] [supp]
[bibtex]
@InProceedings{Miao_2026_CVPR, author = {Miao, Shuhan and Cao, Biru and Zhuang, Junling}, title = {Nestwork: Conditional 3D Furnished House Layout Generation through Latent Heterogeneous Graph Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27093-27103} }
Generalizable Co-Salient Object Detection via Mixed Content-Style Modulation: Guanting Guo,

Shenglong Hu,

Kaihua Zhang,

Guangcan Liu,

Min Xia; [pdf]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Guanting and Hu, Shenglong and Zhang, Kaihua and Liu, Guangcan and Xia, Min}, title = {Generalizable Co-Salient Object Detection via Mixed Content-Style Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32173-32183} }
SHAPE: Structure-aware Hierarchical Unsupervised Domain Adaptation with Plausibility Evaluation for Medical Image Segmentation: Linkuan Zhou,

Yinghao Xia,

Yufei Shen,

Xiangyu Li,

Wenjie Du,

Cong Cong,

Leyi Wei,

Ran Su,

Qiangguo Jin; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Linkuan and Xia, Yinghao and Shen, Yufei and Li, Xiangyu and Du, Wenjie and Cong, Cong and Wei, Leyi and Su, Ran and Jin, Qiangguo}, title = {SHAPE: Structure-aware Hierarchical Unsupervised Domain Adaptation with Plausibility Evaluation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30000-30010} }
Region-Aware Instance Consistency Learning for Micro-Expression Recognition: Yaomin Cai,

C. L. Philip Chen,

Shiting Xu,

Haiqi Liu,

Tong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Yaomin and Chen, C. L. Philip and Xu, Shiting and Liu, Haiqi and Zhang, Tong}, title = {Region-Aware Instance Consistency Learning for Micro-Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34428-34438} }
Seeing the Scene Matters: Revealing Forgetting in Video Understanding Models with a Scene-Aware Long-Video Benchmark: Seng Nam Chen,

Hao Chen,

Chenglam Ho,

Xinyu Mao,

Jinping Wang,

Yu Zhang,

Chao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Seng Nam and Chen, Hao and Ho, Chenglam and Mao, Xinyu and Wang, Jinping and Zhang, Yu and Li, Chao}, title = {Seeing the Scene Matters: Revealing Forgetting in Video Understanding Models with a Scene-Aware Long-Video Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4515-4525} }
Lafite: A Generative Latent Field for 3D Native Texturing: Chia-Hao Chen,

Yuan-Chen Guo,

Zi-Xin Zou,

Ze Yuan,

Guan Luo,

Xiaojuan Qi,

Ding Liang,

Yan-Pei Cao,

Song-Hai Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Chia-Hao and Guo, Yuan-Chen and Zou, Zi-Xin and Yuan, Ze and Luo, Guan and Qi, Xiaojuan and Liang, Ding and Cao, Yan-Pei and Zhang, Song-Hai}, title = {Lafite: A Generative Latent Field for 3D Native Texturing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19960-19971} }
Obstruction Reasoning for Robotic Grasping: Runyu Jiao,

Matteo Bortolon,

Francesco Giuliari,

Alice Fasoli,

Sergio Povoli,

Guofeng Mei,

Yiming Wang,

Fabio Poiesi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Runyu and Bortolon, Matteo and Giuliari, Francesco and Fasoli, Alice and Povoli, Sergio and Mei, Guofeng and Wang, Yiming and Poiesi, Fabio}, title = {Obstruction Reasoning for Robotic Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20755-20764} }
Beyond Multiple Choice: Verifiable OpenQA for Robust Vision-Language RFT: Yesheng Liu,

Hao Li,

Haiyu Xu,

Baoqi Pei,

Jiahao Wang,

Mingxuan Zhao,

Jing-Shu Zheng,

Zheqi He,

JG Yao,

Xi Yang,

Bowen Qin,

Jiajun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yesheng and Li, Hao and Xu, Haiyu and Pei, Baoqi and Wang, Jiahao and Zhao, Mingxuan and Zheng, Jing-Shu and He, Zheqi and Yao, JG and Yang, Xi and Qin, Bowen and Zhang, Jiajun}, title = {Beyond Multiple Choice: Verifiable OpenQA for Robust Vision-Language RFT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18859-18869} }
PartDiffuser: Part-wise 3D Mesh Generation via Discrete Diffusion: Yichen Yang,

Hong Li,

Haodong Zhu,

Linin Yang,

Guojun Lei,

Sheng Xu,

Baochang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yichen and Li, Hong and Zhu, Haodong and Yang, Linin and Lei, Guojun and Xu, Sheng and Zhang, Baochang}, title = {PartDiffuser: Part-wise 3D Mesh Generation via Discrete Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19940-19949} }
Sampling-Aware Quantization for Diffusion Models: Qian Zeng,

Jie Song,

Yuanyu Wan,

Huiqiong Wang,

Mingli Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Qian and Song, Jie and Wan, Yuanyu and Wang, Huiqiong and Song, Mingli}, title = {Sampling-Aware Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35831-35840} }
AdvFM: Lookahead Flow-Matching Velocity-Field Attacks for Imperceptible and Transferable Adversarial Examples: Runze Liu,

Zeyue Wang,

Fanghui Sun,

Rui Liu,

Yihan Yan,

Shen Wang,

Zhaoyang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Runze and Wang, Zeyue and Sun, Fanghui and Liu, Rui and Yan, Yihan and Wang, Shen and Zhang, Zhaoyang}, title = {AdvFM: Lookahead Flow-Matching Velocity-Field Attacks for Imperceptible and Transferable Adversarial Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42290-42299} }
MLLMSplat: A 2D MLLM-Powered Framework for 3D Gaussian Splatting Understanding, Generation, and Editing: Jingqiao Xiu,

Can Wang,

Dong Xu; [pdf] [supp]
[bibtex]
@InProceedings{Xiu_2026_CVPR, author = {Xiu, Jingqiao and Wang, Can and Xu, Dong}, title = {MLLMSplat: A 2D MLLM-Powered Framework for 3D Gaussian Splatting Understanding, Generation, and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33301-33311} }
Plenoptic Video Generation: Xiao Fu,

Shitao Tang,

Min Shi,

Xian Liu,

Jinwei Gu,

Ming-Yu Liu,

Dahua Lin,

Chen-Hsuan Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Xiao and Tang, Shitao and Shi, Min and Liu, Xian and Gu, Jinwei and Liu, Ming-Yu and Lin, Dahua and Lin, Chen-Hsuan}, title = {Plenoptic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16142-16152} }
CraftMesh: High-Fidelity Generative Mesh Manipulation via Poisson Seamless Fusion: James Jincheng Hu,

Yuxiao Wu,

Youcheng Cai,

Ligang Liu; [pdf]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, James Jincheng and Wu, Yuxiao and Cai, Youcheng and Liu, Ligang}, title = {CraftMesh: High-Fidelity Generative Mesh Manipulation via Poisson Seamless Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5934-5944} }
Label What Matters: Modality-Balanced and Difficulty-Aware Multimodal Active Learning: Yuqiao Zeng,

Xu Wang,

Tengfei Liang,

Yiqing Hao,

Yi Jin,

Hui Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yuqiao and Wang, Xu and Liang, Tengfei and Hao, Yiqing and Jin, Yi and Yu, Hui}, title = {Label What Matters: Modality-Balanced and Difficulty-Aware Multimodal Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29391-29399} }
D-Convexity: A Unified Differentiable Convex Shape Prior via Quasi-Concavity for Data-driven Image Segmentation: Shengzhe Chen,

Hao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shengzhe and Yan, Hao}, title = {D-Convexity: A Unified Differentiable Convex Shape Prior via Quasi-Concavity for Data-driven Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34755-34764} }
ACPV-Net: All-Class Polygonal Vectorization for Seamless Vector Map Generation from Aerial Imagery: Weiqin Jiao,

Hao Cheng,

George Vosselman,

Claudio Persello; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Weiqin and Cheng, Hao and Vosselman, George and Persello, Claudio}, title = {ACPV-Net: All-Class Polygonal Vectorization for Seamless Vector Map Generation from Aerial Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13244-13253} }
Fighting Hallucinations with Counterfactuals: Diffusion-Guided Perturbations for LVLM Hallucination Suppression: Hamidreza Dastmalchi,

Aijun An,

Ali Cheraghian,

Hamed Barzamini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dastmalchi_2026_CVPR, author = {Dastmalchi, Hamidreza and An, Aijun and Cheraghian, Ali and Barzamini, Hamed}, title = {Fighting Hallucinations with Counterfactuals: Diffusion-Guided Perturbations for LVLM Hallucination Suppression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4178-4187} }
PromptStereo: Zero-Shot Stereo Matching via Structure and Motion Prompts: Xianqi Wang,

Hao Yang,

Hangtian Wang,

Junda Cheng,

Gangwei Xu,

Min Lin,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xianqi and Yang, Hao and Wang, Hangtian and Cheng, Junda and Xu, Gangwei and Lin, Min and Yang, Xin}, title = {PromptStereo: Zero-Shot Stereo Matching via Structure and Motion Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12565-12575} }
Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models: Huatian Zhang,

Zhendong Mao,

Lei Zhang,

Yongdong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Huatian and Mao, Zhendong and Zhang, Lei and Zhang, Yongdong}, title = {Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37831-37841} }
Calibri: Enhancing Diffusion Transformers via Parameter-Efficient Calibration: Danil Tokhchukov,

Aysel Mirzoeva,

Andrey Kuznetsov,

Konstantin Sobolev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tokhchukov_2026_CVPR, author = {Tokhchukov, Danil and Mirzoeva, Aysel and Kuznetsov, Andrey and Sobolev, Konstantin}, title = {Calibri: Enhancing Diffusion Transformers via Parameter-Efficient Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4635-4644} }
AT-VLA: Adaptive Tactile Injection for Enhanced Feedback Reaction in Vision-Language-Action Models: Xiaoqi Li,

Muhe Cai,

Jiadong Xu,

Juan Zhu,

Hongwei Fan,

Yan Shen,

Guangrui Ren,

Hao Dong; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaoqi and Cai, Muhe and Xu, Jiadong and Zhu, Juan and Fan, Hongwei and Shen, Yan and Ren, Guangrui and Dong, Hao}, title = {AT-VLA: Adaptive Tactile Injection for Enhanced Feedback Reaction in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28764-28774} }
Seeing through Light and Darkness: Sensor-Physics Grounded Deblurring HDR NeRF from Single-Exposure Images and Events: Yunshan Qi,

Lin Zhu,

Nan Bao,

Yifan Zhao,

Jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Yunshan and Zhu, Lin and Bao, Nan and Zhao, Yifan and Li, Jia}, title = {Seeing through Light and Darkness: Sensor-Physics Grounded Deblurring HDR NeRF from Single-Exposure Images and Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22723-22732} }
SAGE: Training Smart Any-Horizon Agents for Long Video Reasoning with Reinforcement Learning: Jitesh Jain,

Allen AI blank,

Jialuo Li,

Zixian Ma,

Jieyu Zhang,

Chris Dongjoo Kim,

Sangho Lee,

Rohun Tripathi,

Tanmay Gupta,

Christopher Clark,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Jain_2026_CVPR, author = {Jain, Jitesh and blank, Allen AI and Li, Jialuo and Ma, Zixian and Zhang, Jieyu and Kim, Chris Dongjoo and Lee, Sangho and Tripathi, Rohun and Gupta, Tanmay and Clark, Christopher and Shi, Humphrey}, title = {SAGE: Training Smart Any-Horizon Agents for Long Video Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41478-41488} }
AutoRegressive Generation with B-rep Holistic Token Sequence Representation: Jiahao Li,

Yunpeng Bai,

Yongkang Dai,

Hao Guo,

Hongping Gan,

Yilei Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Bai, Yunpeng and Dai, Yongkang and Guo, Hao and Gan, Hongping and Shi, Yilei}, title = {AutoRegressive Generation with B-rep Holistic Token Sequence Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24363-24372} }
Is your VLM Sky-Ready? A Comprehensive Spatial Intelligence Benchmark for UAV Navigation: Lingfeng Zhang,

Yuchen Zhang,

Hongsheng Li,

Haoxiang Fu,

Yingbo Tang,

Hangjun Ye,

Long Chen,

Xiaojun Liang,

Xiaoshuai Hao,

Wenbo Ding; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lingfeng and Zhang, Yuchen and Li, Hongsheng and Fu, Haoxiang and Tang, Yingbo and Ye, Hangjun and Chen, Long and Liang, Xiaojun and Hao, Xiaoshuai and Ding, Wenbo}, title = {Is your VLM Sky-Ready? A Comprehensive Spatial Intelligence Benchmark for UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25904-25913} }
Similarity-Consistent Likelihood Diffusion enables Hidden Person Detection from Wall Reflections: Zhiwen Zheng,

Hao Zhou,

Huiyu Qi,

Zhao Huang,

Guangyuan Zhang,

Shaowei Jiang,

Wenwen Tang,

Bin Yang,

Jin Liu,

Xiaoshuai Zhang,

Xingru Huang; [pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiwen and Zhou, Hao and Qi, Huiyu and Huang, Zhao and Zhang, Guangyuan and Jiang, Shaowei and Tang, Wenwen and Yang, Bin and Liu, Jin and Zhang, Xiaoshuai and Huang, Xingru}, title = {Similarity-Consistent Likelihood Diffusion enables Hidden Person Detection from Wall Reflections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13908-13917} }
Optical Flow Matching: Reframing Optical Flow as Continuous Transport Dynamics: Ao Luo,

Xin Li,

Fan Yang,

Yuezun Li,

Zhaoquan Yuan,

Shan Zhao,

Bing Su,

Xiao Wu; [pdf]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Ao and Li, Xin and Yang, Fan and Li, Yuezun and Yuan, Zhaoquan and Zhao, Shan and Su, Bing and Wu, Xiao}, title = {Optical Flow Matching: Reframing Optical Flow as Continuous Transport Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28669-28678} }
Towards Multimodal Domain Generalization with Few Labels: Hongzhao Li,

Hao Dong,

Hualei Wan,

Shupan Li,

Mingliang Xu,

Muhammad Haris Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hongzhao and Dong, Hao and Wan, Hualei and Li, Shupan and Xu, Mingliang and Khan, Muhammad Haris}, title = {Towards Multimodal Domain Generalization with Few Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15070-15079} }
Smoothing the Score Function to Enhance Generalization in Diffusion Models: Xinyu Zhou,

Jiawei Zhang,

Stephen J. Wright; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xinyu and Zhang, Jiawei and Wright, Stephen J.}, title = {Smoothing the Score Function to Enhance Generalization in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43208-43217} }
SegMo: Co-Designing Content-Aware Sparsity and Locally-Cohesive Segment Parallelism for Efficient VLM Inference: Haojuan Li,

Ruohan Tang,

Dongzhou Cheng,

Zongpu Zhang,

Jian Li,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haojuan and Tang, Ruohan and Cheng, Dongzhou and Zhang, Zongpu and Li, Jian and Wang, Jiaqi}, title = {SegMo: Co-Designing Content-Aware Sparsity and Locally-Cohesive Segment Parallelism for Efficient VLM Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33598-33608} }
Reclaiming Lost Text Layers for Source-Free Cross-Domain Few-Shot Learning: Zhenyu Zhang,

Guangyao Chen,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenyu and Chen, Guangyao and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Reclaiming Lost Text Layers for Source-Free Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15080-15090} }
TempoMaster: Efficient Long Video Generation via Next-Frame-Rate Prediction: Yukuo Ma,

Cong Liu,

Junke Wang,

Junqi Liu,

Haibin Huang,

Zuxuan Wu,

Chi Zhang,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yukuo and Liu, Cong and Wang, Junke and Liu, Junqi and Huang, Haibin and Wu, Zuxuan and Zhang, Chi and Li, Xuelong}, title = {TempoMaster: Efficient Long Video Generation via Next-Frame-Rate Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30414-30424} }
DTG-Restore: Training-Free Diffusion Refinement for Generative Video Super-Resolution: Hidir Yesiltepe,

Koutilya PNVR,

Gaurav Pathak,

Navaneeth Bodla,

Bharat Singh,

Pinar Yanardag,

Jinrong Xie; [pdf] [supp]
[bibtex]
@InProceedings{Yesiltepe_2026_CVPR, author = {Yesiltepe, Hidir and PNVR, Koutilya and Pathak, Gaurav and Bodla, Navaneeth and Singh, Bharat and Yanardag, Pinar and Xie, Jinrong}, title = {DTG-Restore: Training-Free Diffusion Refinement for Generative Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23335-23344} }
Learning from Oblivion: Predicting Knowledge-Overflowed Weights via Retrodiction of Forgetting: Jinhyeok Jang,

Jaehong Kim,

Jung Uk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Jinhyeok and Kim, Jaehong and Kim, Jung Uk}, title = {Learning from Oblivion: Predicting Knowledge-Overflowed Weights via Retrodiction of Forgetting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39890-39900} }
EvoID: Reinforced Evolution for Identity-Preserving Video Generation: Yiheng Zhang,

Zhaofan Qiu,

Zunxu Liu,

Yingwei Pan,

Ting Yao,

Tao Mei; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiheng and Qiu, Zhaofan and Liu, Zunxu and Pan, Yingwei and Yao, Ting and Mei, Tao}, title = {EvoID: Reinforced Evolution for Identity-Preserving Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41870-41880} }
CogniVerse: Revolutionizing Multi-Modal Retrieval-Augmented Generation with Cognitive Reflection and Geometric Reasoning: Xiang Fang,

Wanlong Fang,

Changshuo Wang; [pdf]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Xiang and Fang, Wanlong and Wang, Changshuo}, title = {CogniVerse: Revolutionizing Multi-Modal Retrieval-Augmented Generation with Cognitive Reflection and Geometric Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7923-7935} }
Batman: Benign Knowledge Alignment Through Malicious Null Space in Federated Backdoor Attack: Wenwen He,

Wenke Huang,

Yiyang Fang,

Wenjie Qu,

Jiaheng Zhang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Wenwen and Huang, Wenke and Fang, Yiyang and Qu, Wenjie and Zhang, Jiaheng and Ye, Mang}, title = {Batman: Benign Knowledge Alignment Through Malicious Null Space in Federated Backdoor Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13316-13325} }
Bezier Degradation Modeling for LiDAR-based Human Motion Capture: Xiaoqi An,

Lin Zhao,

Jun Li,

Chen Gong,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Xiaoqi and Zhao, Lin and Li, Jun and Gong, Chen and Yang, Jian}, title = {Bezier Degradation Modeling for LiDAR-based Human Motion Capture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14027-14037} }
Enhancing Spatial Understanding in Image Generation via Reward Modeling: Zhenyu Tang,

Chaoran Feng,

Yufan Deng,

Jie Wu,

Xiaojie Li,

Rui Wang,

Yunpeng Chen,

Daquan Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zhenyu and Feng, Chaoran and Deng, Yufan and Wu, Jie and Li, Xiaojie and Wang, Rui and Chen, Yunpeng and Zhou, Daquan}, title = {Enhancing Spatial Understanding in Image Generation via Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27249-27259} }
Towards Efficient Medical Reasoning with Minimal Fine-Tuning Data: Xinlin Zhuang,

Feilong Tang,

Haolin Yang,

Xiwei Liu,

Ming Hu,

Huifa Li,

Haochen Xue,

Junjun He,

Zongyuan Ge,

Yichen Li,

Ying Qian,

Imran Razzak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xinlin and Tang, Feilong and Yang, Haolin and Liu, Xiwei and Hu, Ming and Li, Huifa and Xue, Haochen and He, Junjun and Ge, Zongyuan and Li, Yichen and Qian, Ying and Razzak, Imran}, title = {Towards Efficient Medical Reasoning with Minimal Fine-Tuning Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20223-20232} }
The SA-FARI Dataset: Segment Anything in Footage of Animals for Recognition and Identification: Dante Wasmuht,

Otto Brookes,

Maximilian Schall,

Pablo Palencia,

Christopher Beirne,

Tilo Burghardt,

Majid Mirmehdi,

Hjalmar Kühl,

Mimi Arandjelovic,

Sam Pottie,

Peter Bermant,

Brandon Asheim,

Yi Jin Toh,

Adam Elzinga,

Jason Allan Holmberg,

Andrew Whitworth,

Eleanor Flatt,

Laura Gustafson,

Chaitanya Ryali,

Yuan-Ting Hu,

Baishan Guo,

Andrew Westbury,

Kate Saenko,

Didac Suris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wasmuht_2026_CVPR, author = {Wasmuht, Dante and Brookes, Otto and Schall, Maximilian and Palencia, Pablo and Beirne, Christopher and Burghardt, Tilo and Mirmehdi, Majid and K\"uhl, Hjalmar and Arandjelovic, Mimi and Pottie, Sam and Bermant, Peter and Asheim, Brandon and Toh, Yi Jin and Elzinga, Adam and Holmberg, Jason Allan and Whitworth, Andrew and Flatt, Eleanor and Gustafson, Laura and Ryali, Chaitanya and Hu, Yuan-Ting and Guo, Baishan and Westbury, Andrew and Saenko, Kate and Suris, Didac}, title = {The SA-FARI Dataset: Segment Anything in Footage of Animals for Recognition and Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21679-21689} }
Flow4DGS-SLAM: Optical Flow-Guided 4D Gaussian Splatting SLAM: Yunsong Wang,

Gim Hee Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yunsong and Lee, Gim Hee}, title = {Flow4DGS-SLAM: Optical Flow-Guided 4D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33364-33373} }
Adaptive Anisotropic Gaussian Splatting for Multi-contrast MRI Arbitrary-Scale Super-Resolution with Anatomy Guidance: Qiuhai Yan,

Kang Chen,

Zhengjie Lu,

Tingting Wang,

Faming Fang,

Guixu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Qiuhai and Chen, Kang and Lu, Zhengjie and Wang, Tingting and Fang, Faming and Zhang, Guixu}, title = {Adaptive Anisotropic Gaussian Splatting for Multi-contrast MRI Arbitrary-Scale Super-Resolution with Anatomy Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2188-2197} }
Perceptual 3D Simulation With Physical World Modeling: Wanhee Lee,

Klemen Kotar,

Rahul Mysore Venkatesh,

Jared Watrous,

Daniel LK Yamins; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Wanhee and Kotar, Klemen and Venkatesh, Rahul Mysore and Watrous, Jared and Yamins, Daniel LK}, title = {Perceptual 3D Simulation With Physical World Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27219-27228} }
Intervention-Aware Multiscale Representation Learning from Imaging Phenomics and Perturbation Transcriptomics: Jiayuan Chen,

Ruoqi Liu,

Zishan Gu,

Ping Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiayuan and Liu, Ruoqi and Gu, Zishan and Zhang, Ping}, title = {Intervention-Aware Multiscale Representation Learning from Imaging Phenomics and Perturbation Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41826-41835} }
PMRNet: Physics-informed Multi-scale Refinement Network for Medical Image Segmentation: Boce Kang; [pdf]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Boce}, title = {PMRNet: Physics-informed Multi-scale Refinement Network for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15659-15668} }
HalluGen: Synthesizing Realistic and Controllable Hallucinations for Evaluating Image Restoration: Seunghoi Kim,

Henry F. J. Tregidgo,

Chen Jin,

Matteo Figini,

Daniel C. Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seunghoi and Tregidgo, Henry F. J. and Jin, Chen and Figini, Matteo and Alexander, Daniel C.}, title = {HalluGen: Synthesizing Realistic and Controllable Hallucinations for Evaluating Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32550-32560} }
NeuroSeg Meets DINOv3: Transferring 2D Self-Supervised Visual Priors to 3D Neuron Segmentation via DINOv3 Initialization: Yik San Cheng,

Runkai Zhao,

Weidong Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yik San and Zhao, Runkai and Cai, Weidong}, title = {NeuroSeg Meets DINOv3: Transferring 2D Self-Supervised Visual Priors to 3D Neuron Segmentation via DINOv3 Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30053-30064} }
TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis: Zhengpeng Feng,

Clement Atzberger,

Sadiq Jaffer,

Jovana Knezevic,

Silja Sormunen,

Robin Young,

Madeline C. Lisaius,

Markus Immitzer,

Toby Jackson,

James Ball,

David A. Coomes,

Anil Madhavapeddy,

Andrew Blake,

Srinivasan Keshav; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Zhengpeng and Atzberger, Clement and Jaffer, Sadiq and Knezevic, Jovana and Sormunen, Silja and Young, Robin and Lisaius, Madeline C. and Immitzer, Markus and Jackson, Toby and Ball, James and Coomes, David A. and Madhavapeddy, Anil and Blake, Andrew and Keshav, Srinivasan}, title = {TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34818-34831} }
FlexAvatar: Flexible Large Reconstruction Model for Animatable Gaussian Head Avatars with Detailed Deformation: Cheng Peng,

Zhuo Su,

Liao Wang,

Chen Guo,

Zhaohu Li,

Chengjiang Long,

Zheng Lv,

Jingxiang Sun,

Chenyangguang Zhang,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Cheng and Su, Zhuo and Wang, Liao and Guo, Chen and Li, Zhaohu and Long, Chengjiang and Lv, Zheng and Sun, Jingxiang and Zhang, Chenyangguang and Liu, Yebin}, title = {FlexAvatar: Flexible Large Reconstruction Model for Animatable Gaussian Head Avatars with Detailed Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18229-18240} }
DualSplat: Robust 3D Gaussian Splatting via Pseudo-Mask Bootstrapping from Reconstruction Failures: Xu Wang,

Zhiru Wang,

Shiyun Xie,

Chengwei Pan,

Yisong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xu and Wang, Zhiru and Xie, Shiyun and Pan, Chengwei and Chen, Yisong}, title = {DualSplat: Robust 3D Gaussian Splatting via Pseudo-Mask Bootstrapping from Reconstruction Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4912-4921} }
Bi-directional Autoregressive Diffusion for Large Complex Motion Interpolation: Yongrui Ma,

Shijie Zhao,

Mingde Yao,

Junlin Li,

Li Zhang,

Xiaohong Liu,

Qi Dou,

Jinwei Gu,

Tianfan Xue; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yongrui and Zhao, Shijie and Yao, Mingde and Li, Junlin and Zhang, Li and Liu, Xiaohong and Dou, Qi and Gu, Jinwei and Xue, Tianfan}, title = {Bi-directional Autoregressive Diffusion for Large Complex Motion Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35779-35788} }
INSID3: Training-Free In-Context Segmentation with DINOv3: Claudia Cuttano,

Gabriele Trivigno,

Christoph Reich,

Daniel Cremers,

Carlo Masone,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cuttano_2026_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Reich, Christoph and Cremers, Daniel and Masone, Carlo and Roth, Stefan}, title = {INSID3: Training-Free In-Context Segmentation with DINOv3}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21638-21648} }
Towards Realistic and Consistent Orbital Video Generation via 3D Foundation Priors: Rong Wang,

Ruyi Zha,

Ziang Cheng,

Jiayu Yang,

Pulak Purkait,

Hongdong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Rong and Zha, Ruyi and Cheng, Ziang and Yang, Jiayu and Purkait, Pulak and Li, Hongdong}, title = {Towards Realistic and Consistent Orbital Video Generation via 3D Foundation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18407-18417} }
Relightful Video Portrait Harmonization: Jun Myeong Choi,

Jae Shin Yoon,

Luchao Qi,

Roni Sengupta,

Joon-Young Lee; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jun Myeong and Yoon, Jae Shin and Qi, Luchao and Sengupta, Roni and Lee, Joon-Young}, title = {Relightful Video Portrait Harmonization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23356-23366} }
ArtPro: Self-Supervised Articulated Object Reconstruction with Adaptive Integration of Mobility Proposals: Xuelu Li,

Zhaonan Wang,

Xiaogang Wang,

Lei Wu,

Manyi Li,

Changhe Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuelu and Wang, Zhaonan and Wang, Xiaogang and Wu, Lei and Li, Manyi and Tu, Changhe}, title = {ArtPro: Self-Supervised Articulated Object Reconstruction with Adaptive Integration of Mobility Proposals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13897-13907} }
MOS: Mitigating Optical-SAR Modality Gap for Cross-Modal Ship Re-Identification: Yujian Zhao,

Hankun Liu,

Guanglin Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yujian and Liu, Hankun and Niu, Guanglin}, title = {MOS: Mitigating Optical-SAR Modality Gap for Cross-Modal Ship Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30335-30345} }
Latent Chain-of-Thought World Modeling for End-to-End Autonomous Driving: Shuhan Tan,

Kashyap Chitta,

Yuxiao Chen,

Ran Tian,

Yurong You,

Yan Wang,

Wenjie Luo,

Yulong Cao,

Philipp Krähenbühl,

Marco Pavone,

Boris Ivanovic; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Shuhan and Chitta, Kashyap and Chen, Yuxiao and Tian, Ran and You, Yurong and Wang, Yan and Luo, Wenjie and Cao, Yulong and Kr\"ahenb\"uhl, Philipp and Pavone, Marco and Ivanovic, Boris}, title = {Latent Chain-of-Thought World Modeling for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39724-39733} }
Enabling Supervised Learning of Generative Signatures for Generalized AI-Generated Images Detection: Jianwei Fei,

Yunshu Dai,

Xiaoyu Zhou,

Zhihua Xia,

Alessandro Piva; [pdf]
[bibtex]
@InProceedings{Fei_2026_CVPR, author = {Fei, Jianwei and Dai, Yunshu and Zhou, Xiaoyu and Xia, Zhihua and Piva, Alessandro}, title = {Enabling Supervised Learning of Generative Signatures for Generalized AI-Generated Images Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14084-14094} }
BDNet:Bio-Inspired Dual-Backbone Small Object Detection Network: Wenchao Guan,

Chuan Lin,

Sihan Huang,

Xiongzhen Wang,

Xintao Pang; [pdf] [supp]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Wenchao and Lin, Chuan and Huang, Sihan and Wang, Xiongzhen and Pang, Xintao}, title = {BDNet:Bio-Inspired Dual-Backbone Small Object Detection Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32724-32734} }
RoboTAG: End-to-end Robot Pose Estimation via Topological Alignment Graph: Yifan Liu,

Fangneng Zhan,

Wanhua Li,

Haowen Sun,

Katerina Fragkiadaki,

Hanspeter Pfister; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yifan and Zhan, Fangneng and Li, Wanhua and Sun, Haowen and Fragkiadaki, Katerina and Pfister, Hanspeter}, title = {RoboTAG: End-to-end Robot Pose Estimation via Topological Alignment Graph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35146-35155} }
Incentivizing Versatile Video Reasoning in MLLMs via Data-Efficient Reinforcement Learning: Xiaodong Wang,

Zhirong Wu,

Langling Huang,

Yuxi Zheng,

Peixi Peng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaodong and Wu, Zhirong and Huang, Langling and Zheng, Yuxi and Peng, Peixi}, title = {Incentivizing Versatile Video Reasoning in MLLMs via Data-Efficient Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5444-5454} }
SegQuant: A Semantics-Aware and Generalizable Quantization Framework for Diffusion Models: Jiaji Zhang,

Ruichao Sun,

Hailiang Zhao,

Jiaju Wu,

Peng Chen,

Hao Li,

Yuying Liu,

Kingsum Chow,

Gang Xiong,

Shuiguang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaji and Sun, Ruichao and Zhao, Hailiang and Wu, Jiaju and Chen, Peng and Li, Hao and Liu, Yuying and Chow, Kingsum and Xiong, Gang and Deng, Shuiguang}, title = {SegQuant: A Semantics-Aware and Generalizable Quantization Framework for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43717-43726} }
GEM: Generating LiDAR World Model via Deformable Mamba: Yang Wu,

Zhaojiang Liu,

Qiang Meng,

Youquan Liu,

Renliang Weng,

Jianjun Qian,

Jian Yang,

Jin Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yang and Liu, Zhaojiang and Meng, Qiang and Liu, Youquan and Weng, Renliang and Qian, Jianjun and Yang, Jian and Xie, Jin}, title = {GEM: Generating LiDAR World Model via Deformable Mamba}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24227-24236} }
Efficient All-Pairs Correlation Volume Sampling for Optical Flow Estimation: Karlis Martins Briedis,

Studios,

ETH Zurich 0000-0003-4012-6292,

Markus Gross,

Studios,

ETH Zurich 0009-0003-9324-779X,

Christopher Schroers,

Studios 0000-0003-1473-1878; [pdf] [supp]
[bibtex]
@InProceedings{Briedis_2026_CVPR, author = {Briedis, Karlis Martins and Studios and 0000-0003-4012-6292, ETH Zurich and Gross, Markus and Studios and 0009-0003-9324-779X, ETH Zurich and Schroers, Christopher and 0000-0003-1473-1878, Studios}, title = {Efficient All-Pairs Correlation Volume Sampling for Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5700-5709} }
F^2HDR: Two-Stage HDR Video Reconstruction via Flow Adapter and Physical Motion Modeling: Huanjing Yue,

Dawei Li,

Shaoxiong Tu,

Jingyu Yang; [pdf] [supp]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Huanjing and Li, Dawei and Tu, Shaoxiong and Yang, Jingyu}, title = {F{\textasciicircum}2HDR: Two-Stage HDR Video Reconstruction via Flow Adapter and Physical Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33985-33994} }
Multimodal Continual Instruction Tuning with Dynamic Gradient Guidance: Songze Li,

Mingyu Gao,

Tonghua Su,

Xu-Yao Zhang,

Zhongjie Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Songze and Gao, Mingyu and Su, Tonghua and Zhang, Xu-Yao and Wang, Zhongjie}, title = {Multimodal Continual Instruction Tuning with Dynamic Gradient Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10820-10829} }
AnyID: Ultra-Fidelity Universal Identity-Preserving Video Generation from Any Visual References: Jiahao Wang,

Hualian Sheng,

Sijia Cai,

Yuxiao Yang,

Weizhan Zhang,

Caixia Yan,

Bing Deng,

Jieping Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Sheng, Hualian and Cai, Sijia and Yang, Yuxiao and Zhang, Weizhan and Yan, Caixia and Deng, Bing and Ye, Jieping}, title = {AnyID: Ultra-Fidelity Universal Identity-Preserving Video Generation from Any Visual References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12808-12817} }
ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving: Zhiyu Zheng,

Shaoyu Chen,

Haoran Yin,

Xinbang Zhang,

Jialv Zou,

Xinggang Wang,

Qian Zhang,

Lefei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiyu and Chen, Shaoyu and Yin, Haoran and Zhang, Xinbang and Zou, Jialv and Wang, Xinggang and Zhang, Qian and Zhang, Lefei}, title = {ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3729-3739} }
Featurising Pixels from Dynamic 3D Scenes with Linear In-Context Learners: Nikita Araslanov,

Martin Sundermeyer,

Hidenobu Matsuki,

David Joseph Tan,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Araslanov_2026_CVPR, author = {Araslanov, Nikita and Sundermeyer, Martin and Matsuki, Hidenobu and Tan, David Joseph and Tombari, Federico}, title = {Featurising Pixels from Dynamic 3D Scenes with Linear In-Context Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21443-21452} }
FoSS: Modeling Long-Range Dependencies and Multimodal Uncertainty in Trajectory Prediction via Fourier-State Space Integration: Yizhou Huang,

Genze Jiang,

Yihua Cheng,

Kezhi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yizhou and Jiang, Genze and Cheng, Yihua and Wang, Kezhi}, title = {FoSS: Modeling Long-Range Dependencies and Multimodal Uncertainty in Trajectory Prediction via Fourier-State Space Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3750-3760} }
Hi-Lo Prune: Look at What You'll Lose before Pruning with Hierarchical Token Selection: Zixun Sun,

Yubo Dong,

Hehe Fan,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zixun and Dong, Yubo and Fan, Hehe and Yang, Yi}, title = {Hi-Lo Prune: Look at What You'll Lose before Pruning with Hierarchical Token Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31941-31951} }
ParallelVLM: Lossless Video-LLM Acceleration with Visual Alignment Aware Parallel Speculative Decoding: Quan Kong,

Yuhao Shen,

Yicheng Ji,

Huan Li,

Cong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Quan and Shen, Yuhao and Ji, Yicheng and Li, Huan and Wang, Cong}, title = {ParallelVLM: Lossless Video-LLM Acceleration with Visual Alignment Aware Parallel Speculative Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11392-11402} }
CRIT: Graph-Based Automatic Data Synthesis to Enhance Cross-Modal Multi-Hop Reasoning: Junyoung Sung,

Seungwoo Lyu,

Minjun Kim,

Sumin An,

Arsha Nagrani,

Paul Hongsuck Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sung_2026_CVPR, author = {Sung, Junyoung and Lyu, Seungwoo and Kim, Minjun and An, Sumin and Nagrani, Arsha and Seo, Paul Hongsuck}, title = {CRIT: Graph-Based Automatic Data Synthesis to Enhance Cross-Modal Multi-Hop Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19143-19154} }
Rethinking Knowledge Transfer in Image Quality Assessment: A Perceptual Preference Structure Alignment Perspective: Aobo Li,

Jinjian Wu,

Yongxu Liu,

Jupo Ma,

Weisheng Dong; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Aobo and Wu, Jinjian and Liu, Yongxu and Ma, Jupo and Dong, Weisheng}, title = {Rethinking Knowledge Transfer in Image Quality Assessment: A Perceptual Preference Structure Alignment Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1310-1319} }
EMO-R3: Reflective Reinforcement Learning for Emotional Reasoning in Multimodal Large Language Models: Yiyang Fang,

Wenke Huang,

Pei Fu,

Yihao Yang,

Kehua Su,

Zhenbo Luo,

Jian Luan,

Mang Ye; [pdf] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Yiyang and Huang, Wenke and Fu, Pei and Yang, Yihao and Su, Kehua and Luo, Zhenbo and Luan, Jian and Ye, Mang}, title = {EMO-R3: Reflective Reinforcement Learning for Emotional Reasoning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {745-755} }
RNN as Linear Transformer: A Closer Investigation into Representational Potentials of Visual Mamba Models: Timing Yang,

Feng Wang,

Guoyizhe Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Timing and Wang, Feng and Wei, Guoyizhe}, title = {RNN as Linear Transformer: A Closer Investigation into Representational Potentials of Visual Mamba Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27398-27408} }
Illumination-Consistent Human-Scene Reconstruction from Monocular Video: Rongbin Zheng,

Wensheng Li,

Lingzhe Zeng,

Dong Wang,

Chengying Gao; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Rongbin and Li, Wensheng and Zeng, Lingzhe and Wang, Dong and Gao, Chengying}, title = {Illumination-Consistent Human-Scene Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14050-14061} }
VideoWorld 2: Learning Transferable Knowledge from Real-world Videos: Zhongwei Ren,

Yunchao Wei,

Xiao Yu,

Guixun Luo,

Yao Zhao,

Bingyi Kang,

Jiashi Feng,

Xiaojie Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Zhongwei and Wei, Yunchao and Yu, Xiao and Luo, Guixun and Zhao, Yao and Kang, Bingyi and Feng, Jiashi and Jin, Xiaojie}, title = {VideoWorld 2: Learning Transferable Knowledge from Real-world Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40569-40580} }
MixFlow Training: Alleviating Exposure Bias with Slowed Interpolation Mixture: Hui Li,

Jiayue Lyu,

Fu-Yun Wang,

Kaihui Cheng,

Siyu Zhu,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hui and Lyu, Jiayue and Wang, Fu-Yun and Cheng, Kaihui and Zhu, Siyu and Wang, Jingdong}, title = {MixFlow Training: Alleviating Exposure Bias with Slowed Interpolation Mixture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9095-9105} }
RDFace: A Benchmark Dataset for Rare Disease Facial Image Analysis under Extreme Data Scarcity and Phenotype-Aware Synthetic Generation: Ganlin Feng,

Yuxi Long,

Hafsa Ali,

Erin Lou,

Fahad Butt,

Qian Liu,

Yang Wang,

Pingzhao Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Ganlin and Long, Yuxi and Ali, Hafsa and Lou, Erin and Butt, Fahad and Liu, Qian and Wang, Yang and Hu, Pingzhao}, title = {RDFace: A Benchmark Dataset for Rare Disease Facial Image Analysis under Extreme Data Scarcity and Phenotype-Aware Synthetic Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42976-42986} }
TherA: Thermal-Aware Visual-Language Prompting for Controllable RGB-to-Thermal Infrared Translation: Dong-Guw Lee,

Tai Hyoung Rhee,

Hyunsoo Jang,

Young-Sik Shin,

Ukcheol Shin,

Ayoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Dong-Guw and Rhee, Tai Hyoung and Jang, Hyunsoo and Shin, Young-Sik and Shin, Ukcheol and Kim, Ayoung}, title = {TherA: Thermal-Aware Visual-Language Prompting for Controllable RGB-to-Thermal Infrared Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36803-36813} }
TTP: Test-Time Padding for Adversarial Detection and Robust Adaptation on Vision-Language Models: Zhiwei Li,

Yitian Pang,

Weining Wang,

Zhenan Sun,

Qi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhiwei and Pang, Yitian and Wang, Weining and Sun, Zhenan and Li, Qi}, title = {TTP: Test-Time Padding for Adversarial Detection and Robust Adaptation on Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1513-1522} }
DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models: Jin Liu,

Ning Xi,

Yinbin Miao,

Junkang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jin and Xi, Ning and Miao, Yinbin and Liu, Junkang}, title = {DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3358-3368} }
AMB3R: Accurate Feed-forward Metric-scale 3D Reconstruction with Backend: Hengyi Wang,

Lourdes Agapito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Hengyi and Agapito, Lourdes}, title = {AMB3R: Accurate Feed-forward Metric-scale 3D Reconstruction with Backend}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14612-14625} }
MLLM-HWSI: A Multimodal Large Language Model for Hierarchical Whole Slide Image Understanding: Basit Alawode,

Arif Mahmood,

Muaz Khalifa Al Radi,

Shahad Albastaki,

Asim Khan,

Muhammad Bilal,

Moshira Ali Abdalla,

Mohammed Bennamoun,

Sajid Javed; [pdf] [supp]
[bibtex]
@InProceedings{Alawode_2026_CVPR, author = {Alawode, Basit and Mahmood, Arif and Al Radi, Muaz Khalifa and Albastaki, Shahad and Khan, Asim and Bilal, Muhammad and Abdalla, Moshira Ali and Bennamoun, Mohammed and Javed, Sajid}, title = {MLLM-HWSI: A Multimodal Large Language Model for Hierarchical Whole Slide Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13732-13743} }
PatchScene: Patch-based Voxel Diffusion Model for Large-Scale Scene Completion: Qingdong Xu,

Jiajun Zhu,

Shilin Zhu,

Xinjing He,

Chao Lu,

Huanran Wang,

Jiyao Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qingdong and Zhu, Jiajun and Zhu, Shilin and He, Xinjing and Lu, Chao and Wang, Huanran and Zhang, Jiyao}, title = {PatchScene: Patch-based Voxel Diffusion Model for Large-Scale Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16499-16508} }
Through the Frequency Lens: Cross-Domain Generalisable Gaze Estimation with Adaptive Modulation: Yang Xu,

Yiwei Bao,

Feng Lu; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yang and Bao, Yiwei and Lu, Feng}, title = {Through the Frequency Lens: Cross-Domain Generalisable Gaze Estimation with Adaptive Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42858-42868} }
PosterIQ: A Design Perspective Benchmark for Poster Understanding and Generation: Yuheng Feng,

Wen Zhang,

Haodong Duan,

Xingxing Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yuheng and Zhang, Wen and Duan, Haodong and Zou, Xingxing}, title = {PosterIQ: A Design Perspective Benchmark for Poster Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29295-29304} }
Rethinking 2D-3D Registration: A Novel Network for High-Value Zone Selection and Representation Consistency Alignment: Zhixin Cheng,

Bohao Liao,

Jiacheng Deng,

Xiaotian Yin,

Xinjun Li,

Yujia Chen,

Baoqun Yin,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zhixin and Liao, Bohao and Deng, Jiacheng and Yin, Xiaotian and Li, Xinjun and Chen, Yujia and Yin, Baoqun and Zhang, Tianzhu}, title = {Rethinking 2D-3D Registration: A Novel Network for High-Value Zone Selection and Representation Consistency Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39052-39063} }
Fuel Gauge: Estimating Chain-of-Thought Length Ahead of Time in Large Multimodal Models: Yuedong Yang,

Xiwen Wei,

Mustafa Munir,

Radu Marculescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yuedong and Wei, Xiwen and Munir, Mustafa and Marculescu, Radu}, title = {Fuel Gauge: Estimating Chain-of-Thought Length Ahead of Time in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33436-33445} }
FB-CLIP: Fine-Grained Zero-Shot Anomaly Detection with Foreground-Background Disentanglement: Ming Hu,

Yongsheng Huo,

Mingyu Dou,

Jianfu Yin,

Peng Zhao,

Yao Wang,

Cong Hu,

Bingliang Hu,

Quan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Ming and Huo, Yongsheng and Dou, Mingyu and Yin, Jianfu and Zhao, Peng and Wang, Yao and Hu, Cong and Hu, Bingliang and Wang, Quan}, title = {FB-CLIP: Fine-Grained Zero-Shot Anomaly Detection with Foreground-Background Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35659-35669} }
MM-ReCoder: Advancing Chart-to-Code Generation with Reinforcement Learning and Self-Correction: Zitian Tang,

Xu Zhang,

Jianbo Yuan,

Yang Zou,

Varad Gunjal,

Songyao Jiang,

Davide Modolo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zitian and Zhang, Xu and Yuan, Jianbo and Zou, Yang and Gunjal, Varad and Jiang, Songyao and Modolo, Davide}, title = {MM-ReCoder: Advancing Chart-to-Code Generation with Reinforcement Learning and Self-Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22164-22173} }
HATS: Hardness-Aware Trajectory Synthesis for GUI Agents: Rui Shao,

Ruize Gao,

Bin Xie,

Yixing Li,

Kaiwen Zhou,

Shuai Wang,

Weili Guan,

Gongwei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Rui and Gao, Ruize and Xie, Bin and Li, Yixing and Zhou, Kaiwen and Wang, Shuai and Guan, Weili and Chen, Gongwei}, title = {HATS: Hardness-Aware Trajectory Synthesis for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27471-27481} }
Frequency-Aware Flow Matching for High-Quality Image Generation: Sucheng Ren,

Qihang Yu,

Ju He,

Xiaohui Shen,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Sucheng and Yu, Qihang and He, Ju and Shen, Xiaohui and Chen, Liang-Chieh}, title = {Frequency-Aware Flow Matching for High-Quality Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9074-9083} }
DINO Eats CLIP: Adapting Beyond Knowns for Open-set 3D Object Retrieval: Xinwei He,

Yansong Zheng,

Qianru Han,

Zhichuan Wang,

Yuxuan Cai,

Yang Zhou,

Jingbo Xia,

Yulong Wang,

Jinhai Xiang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xinwei and Zheng, Yansong and Han, Qianru and Wang, Zhichuan and Cai, Yuxuan and Zhou, Yang and Xia, Jingbo and Wang, Yulong and Xiang, Jinhai and Bai, Xiang}, title = {DINO Eats CLIP: Adapting Beyond Knowns for Open-set 3D Object Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34704-34713} }
SwiftTailor: Efficient 3D Garment Generation with Geometry Image Representation: Phuc Pham,

Uy Dieu Tran,

Binh-Son Hua,

Phong Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2026_CVPR, author = {Pham, Phuc and Tran, Uy Dieu and Hua, Binh-Son and Nguyen, Phong}, title = {SwiftTailor: Efficient 3D Garment Generation with Geometry Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27169-27178} }
MedLIME: A Distribution-Aligned and Evidence-Supported Framework for Medical Saliency Explanations: Raghav Magazine,

Xingjian Li,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Magazine_2026_CVPR, author = {Magazine, Raghav and Li, Xingjian and Xu, Min}, title = {MedLIME: A Distribution-Aligned and Evidence-Supported Framework for Medical Saliency Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38926-38935} }
SAGE: Style-Adaptive Generalization for Privacy-Constrained Semantic Segmentation Across Domains: Qingmei Li,

Yang Zhang,

Peifeng Zhang,

Haohuan Fu,

Juepeng Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qingmei and Zhang, Yang and Zhang, Peifeng and Fu, Haohuan and Zheng, Juepeng}, title = {SAGE: Style-Adaptive Generalization for Privacy-Constrained Semantic Segmentation Across Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13134-13144} }
Rethinking Dataset Distillation: Hard Truths about Soft Labels: Priyam Dey,

Aditya Sahdev,

Sunny Bhati,

Konda Reddy Mopuri,

Venkatesh Babu Radhakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dey_2026_CVPR, author = {Dey, Priyam and Sahdev, Aditya and Bhati, Sunny and Mopuri, Konda Reddy and Radhakrishnan, Venkatesh Babu}, title = {Rethinking Dataset Distillation: Hard Truths about Soft Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {178-187} }
LIFT and PLACE: A Simple, Stable, and Effective Knowledge Distillation Framework for Lightweight Diffusion Models: Hyunsoo Han,

Sangyeop Yeo,

Jaejun Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Hyunsoo and Yeo, Sangyeop and Yoo, Jaejun}, title = {LIFT and PLACE: A Simple, Stable, and Effective Knowledge Distillation Framework for Lightweight Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5564-5573} }
EditCtrl: Disentangled Local and Global Control for Real-Time Generative Video Editing: Yehonathan Litman,

Shikun Liu,

Dario Seyb,

Nicholas Milef,

Yang Zhou,

Carl Marshall,

Shubham Tulsiani,

Caleb Leak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Litman_2026_CVPR, author = {Litman, Yehonathan and Liu, Shikun and Seyb, Dario and Milef, Nicholas and Zhou, Yang and Marshall, Carl and Tulsiani, Shubham and Leak, Caleb}, title = {EditCtrl: Disentangled Local and Global Control for Real-Time Generative Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8965-8975} }
BiFM: Bidirectional Flow Matching for Few-Step Image Editing and Generation: Yasong Dai,

Zeeshan Hayder,

David Ahmedt-Aristizabal,

Hongdong Li; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Yasong and Hayder, Zeeshan and Ahmedt-Aristizabal, David and Li, Hongdong}, title = {BiFM: Bidirectional Flow Matching for Few-Step Image Editing and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23325-23334} }
Diffusion with a Linguistic Compass: Steering the Generation of Clinically Plausible Future sMRI Representations for Early MCI Conversion Prediction: Zhihao Tang,

Chaozhuo Li,

Litian Zhang,

Xi Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Zhihao and Li, Chaozhuo and Zhang, Litian and Zhang, Xi}, title = {Diffusion with a Linguistic Compass: Steering the Generation of Clinically Plausible Future sMRI Representations for Early MCI Conversion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42691-42700} }
Vector Prism: Animating Vector Graphics by Stratifying Semantic Structure: Jooyeol Yun,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2026_CVPR, author = {Yun, Jooyeol and Choo, Jaegul}, title = {Vector Prism: Animating Vector Graphics by Stratifying Semantic Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17316-17325} }
CIGPose: Causal Intervention Graph Neural Network for Whole-Body Pose Estimation: Bohao Li,

Zhicheng Cao,

Huixian Li,

Yangming Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bohao and Cao, Zhicheng and Li, Huixian and Guo, Yangming}, title = {CIGPose: Causal Intervention Graph Neural Network for Whole-Body Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23484-23494} }
Lite Any Stereo: Efficient Zero-Shot Stereo Matching: Junpeng Jing,

Weixun Luo,

Ye Mao,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Junpeng and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {Lite Any Stereo: Efficient Zero-Shot Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21725-21735} }
Adaptive Bayesian Early-Exit Networks for Efficient Non-Transferable Learning: Siyu Luan,

Yan Li,

Zhong Chen,

Zhenyi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Luan_2026_CVPR, author = {Luan, Siyu and Li, Yan and Chen, Zhong and Wang, Zhenyi}, title = {Adaptive Bayesian Early-Exit Networks for Efficient Non-Transferable Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24514-24523} }
MoLingo: Motion-Language Alignment for Text-to-Human Motion Generation: Yannan He,

Garvita Tiwari,

Xiaohan Zhang,

Pankaj Bora,

Tolga Birdal,

Jan Eric Lenssen,

Gerard Pons-Moll; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yannan and Tiwari, Garvita and Zhang, Xiaohan and Bora, Pankaj and Birdal, Tolga and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {MoLingo: Motion-Language Alignment for Text-to-Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38387-38398} }
Beyond Objects: Contextual Synthetic Data Generation for Fine-Grained Classification: William Yang,

Xindi Wu,

Zhiwei Deng,

Esin Tureci,

Olga Russakovsky; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, William and Wu, Xindi and Deng, Zhiwei and Tureci, Esin and Russakovsky, Olga}, title = {Beyond Objects: Contextual Synthetic Data Generation for Fine-Grained Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22498-22508} }
Masking Teacher and Reinforcing Student for Distilling Vision-Language Models: Byung-Kwan Lee,

Yu-Chiang Frank Wang,

Ryo Hachiuma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Wang, Yu-Chiang Frank and Hachiuma, Ryo}, title = {Masking Teacher and Reinforcing Student for Distilling Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10126-10141} }
DextER: Language-driven Dexterous Grasp Generation with Embodied Reasoning: Junha Lee,

Eunha Park,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Junha and Park, Eunha and Cho, Minsu}, title = {DextER: Language-driven Dexterous Grasp Generation with Embodied Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1067-1077} }
Lighting in Motion: Spatiotemporal HDR Lighting Estimation: Christophe Bolduc,

Julien Philip,

Li Ma,

Mingming He,

Paul Debevec,

Jean-François Lalonde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bolduc_2026_CVPR, author = {Bolduc, Christophe and Philip, Julien and Ma, Li and He, Mingming and Debevec, Paul and Lalonde, Jean-Fran\c{c}ois}, title = {Lighting in Motion: Spatiotemporal HDR Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19696-19705} }
Contact-Aware Neural Dynamics: Changwei Jing,

Jai Krishna Bandi,

Jianglong Ye,

Yan Duan,

Pieter Abbeel,

Xiaolong Wang,

Sha Yi; [pdf] [arXiv]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Changwei and Bandi, Jai Krishna and Ye, Jianglong and Duan, Yan and Abbeel, Pieter and Wang, Xiaolong and Yi, Sha}, title = {Contact-Aware Neural Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13442-13452} }
VL-Eraser: Vacuum Distillation for Machine Unlearning in Vision-Language Models: Yili Wang,

Lu Dai,

Tairan Huang,

Yijie Xu,

Hui Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yili and Dai, Lu and Huang, Tairan and Xu, Yijie and Xiong, Hui}, title = {VL-Eraser: Vacuum Distillation for Machine Unlearning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31504-31513} }
CASR: A Robust Cyclic Framework for Arbitrary Large-Scale Super-Resolution with Distribution Alignment and Self-Similarity Awareness: Wenhao Guo,

Zhaoran Zhao,

Peng Lu,

Sheng Li,

Qian Qiao,

RuiDe Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Wenhao and Zhao, Zhaoran and Lu, Peng and Li, Sheng and Qiao, Qian and Li, RuiDe}, title = {CASR: A Robust Cyclic Framework for Arbitrary Large-Scale Super-Resolution with Distribution Alignment and Self-Similarity Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2136-2145} }
Bootstrap Your Own AV-Proxies: Adaptive Contrastive and Prototype Learning for Audio-Visual Segmentation: Junbo Zhang,

Hang Su,

Zhaofan Li,

Hang Dong,

Chao Sun; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junbo and Su, Hang and Li, Zhaofan and Dong, Hang and Sun, Chao}, title = {Bootstrap Your Own AV-Proxies: Adaptive Contrastive and Prototype Learning for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23061-23071} }
CoordSpeaker: Exploiting Gesture Captioning for Coordinated Caption-Empowered Co-Speech Gesture Generation: Fengyi Fang,

Sicheng Yang,

Wenming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Fengyi and Yang, Sicheng and Yang, Wenming}, title = {CoordSpeaker: Exploiting Gesture Captioning for Coordinated Caption-Empowered Co-Speech Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30761-30771} }
Self-Corrected Image Generation with Explainable Latent Rewards: Yinyi Luo,

Hrishikesh Gokhale,

Marios Savvides,

Jindong Wang,

Shengfeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Yinyi and Gokhale, Hrishikesh and Savvides, Marios and Wang, Jindong and He, Shengfeng}, title = {Self-Corrected Image Generation with Explainable Latent Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20088-20097} }
FlashMotion: Few-Step Controllable Video Generation with Trajectory Guidance: Quanhao Li,

Zhen Xing,

Rui Wang,

Haidong Cao,

Qi Dai,

Daoguo Dong,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Quanhao and Xing, Zhen and Wang, Rui and Cao, Haidong and Dai, Qi and Dong, Daoguo and Wu, Zuxuan}, title = {FlashMotion: Few-Step Controllable Video Generation with Trajectory Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8986-8996} }
Quantum-Gated Task-interaction Knowledge Distillation for Pre-trained Model-based Class-Incremental Learning: Linjie Li,

Huiyu Xiao,

Jiarui Cao,

Zhenyu Wu,

Yang Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Linjie and Xiao, Huiyu and Cao, Jiarui and Wu, Zhenyu and Ji, Yang}, title = {Quantum-Gated Task-interaction Knowledge Distillation for Pre-trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3920-3929} }
Neu-PiG: Neural Preconditioned Grids for Fast Dynamic Surface Reconstruction on Long Sequences: Julian Kaltheuner,

Hannah Dröge,

Markus Plack,

Patrick Stotko,

Reinhard Klein; [pdf] [supp]
[bibtex]
@InProceedings{Kaltheuner_2026_CVPR, author = {Kaltheuner, Julian and Dr\"oge, Hannah and Plack, Markus and Stotko, Patrick and Klein, Reinhard}, title = {Neu-PiG: Neural Preconditioned Grids for Fast Dynamic Surface Reconstruction on Long Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36539-36549} }
VecAttention: Vector-wise Sparse Attention for Accelerating Long Context Inference: Anmin Liu,

Ruixuan Yang,

Huiqiang Jiang,

Bin Lin,

Minmin Sun,

Yong Li,

Chen Zhang,

Tao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Anmin and Yang, Ruixuan and Jiang, Huiqiang and Lin, Bin and Sun, Minmin and Li, Yong and Zhang, Chen and Xie, Tao}, title = {VecAttention: Vector-wise Sparse Attention for Accelerating Long Context Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41299-41310} }
GIFSplat: Generative Prior-Guided Iterative Feed-Forward 3D Gaussian Splatting from Sparse Views: Tianyu Chen,

Wei Xiang,

Kang Han,

Yu Lu,

Di Wu,

Gaowen Liu,

Ramana Rao Kompella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tianyu and Xiang, Wei and Han, Kang and Lu, Yu and Wu, Di and Liu, Gaowen and Kompella, Ramana Rao}, title = {GIFSplat: Generative Prior-Guided Iterative Feed-Forward 3D Gaussian Splatting from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26031-26040} }
See It, Say It, Sorted: An Iterative Training-Free Framework for Visually-Grounded Multimodal Reasoning in LVLMs: Yongchang Zhang,

Oliver Ma,

Tianyi Liu,

Guangquan Zhou,

Yang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongchang and Ma, Oliver and Liu, Tianyi and Zhou, Guangquan and Chen, Yang}, title = {See It, Say It, Sorted: An Iterative Training-Free Framework for Visually-Grounded Multimodal Reasoning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11933-11942} }
VRCLIP: Multimodal Canonical Correlation Alignment for CLIP-Driven Vision-Radio Person Re-Identification: Rui Zhang,

Yaqi Wang,

Yadong Li,

Ruixu Geng,

Jianyang Wang,

Qijun Ying,

Dongheng Zhang,

Yang Hu,

Yan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Rui and Wang, Yaqi and Li, Yadong and Geng, Ruixu and Wang, Jianyang and Ying, Qijun and Zhang, Dongheng and Hu, Yang and Chen, Yan}, title = {VRCLIP: Multimodal Canonical Correlation Alignment for CLIP-Driven Vision-Radio Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25536-25546} }
Statistical Characteristic-Guided Denoising for Rapid High-Resolution Transmission Electron Microscopy Imaging: Hesong Li,

Ziqi Wu,

Ruiwen Shao,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hesong and Wu, Ziqi and Shao, Ruiwen and Fu, Ying}, title = {Statistical Characteristic-Guided Denoising for Rapid High-Resolution Transmission Electron Microscopy Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34050-34060} }
HERBench: A Benchmark for Multi-Evidence Integration in Video Question Answering: Dan Ben Ami,

Gabriele Serussi,

Kobi Cohen,

Chaim Baskin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ben_Ami_2026_CVPR, author = {Ben Ami, Dan and Serussi, Gabriele and Cohen, Kobi and Baskin, Chaim}, title = {HERBench: A Benchmark for Multi-Evidence Integration in Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4505-4514} }
LAMP: Language-Assisted Motion Planning for Controllable Video Generation: Muhammed Burak Kizil,

Enes Sanli,

Niloy J. Mitra,

Erkut Erdem,

Aykut Erdem,

Duygu Ceylan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kizil_2026_CVPR, author = {Kizil, Muhammed Burak and Sanli, Enes and Mitra, Niloy J. and Erdem, Erkut and Erdem, Aykut and Ceylan, Duygu}, title = {LAMP: Language-Assisted Motion Planning for Controllable Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12829-12838} }
MMBench-GUI: A Unified Hierarchical Evaluation Framework for Multi-Platform GUI Agents: Xuehui Wang,

Zhenyu Wu,

JingJing Xie,

Zichen Ding,

Bowen Yang,

Zehao Li,

Zhaoyang Liu,

Qingyun Li,

Xuan Dong,

Zhe Chen,

Weiyun Wang,

Xiangyu Zhao,

Jixuan Chen,

Haodong Duan,

Tianbao Xie,

Chenyu Yang,

Shiqian Su,

Yue Yu,

Yanting Zhang,

Xiangyu Yue,

Weijie Su,

Xizhou Zhu,

Wei Shen,

Jifeng Dai,

Wenhai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xuehui and Wu, Zhenyu and Xie, JingJing and Ding, Zichen and Yang, Bowen and Li, Zehao and Liu, Zhaoyang and Li, Qingyun and Dong, Xuan and Chen, Zhe and Wang, Weiyun and Zhao, Xiangyu and Chen, Jixuan and Duan, Haodong and Xie, Tianbao and Yang, Chenyu and Su, Shiqian and Yu, Yue and Zhang, Yanting and Yue, Xiangyu and Su, Weijie and Zhu, Xizhou and Shen, Wei and Dai, Jifeng and Wang, Wenhai}, title = {MMBench-GUI: A Unified Hierarchical Evaluation Framework for Multi-Platform GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6239-6248} }
Hermite Radial Basis Function for Surface Reconstruction via Differentiable Rendering: Hugo Blanc,

Jean-Emmanuel Deschaud,

Alexis Paljic; [pdf] [supp]
[bibtex]
@InProceedings{Blanc_2026_CVPR, author = {Blanc, Hugo and Deschaud, Jean-Emmanuel and Paljic, Alexis}, title = {Hermite Radial Basis Function for Surface Reconstruction via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15376-15386} }
GDRO: Group-level Reward Post-training Suitable for Diffusion Models: Yiyang Wang,

Xi Chen,

Xiaogang Xu,

Yu Liu,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyang and Chen, Xi and Xu, Xiaogang and Liu, Yu and Zhao, Hengshuang}, title = {GDRO: Group-level Reward Post-training Suitable for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43505-43513} }
Learning 3D Representations for Spatial Intelligence from Unposed Multi-View Images: Bo Zhou,

Qiuxia Lai,

Zeren Sun,

Xiangbo Shu,

Yazhou Yao,

Wenguan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Bo and Lai, Qiuxia and Sun, Zeren and Shu, Xiangbo and Yao, Yazhou and Wang, Wenguan}, title = {Learning 3D Representations for Spatial Intelligence from Unposed Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22550-22560} }
mmWaveFlow: Unified Enhancement and Generation of mmWave Human Point Clouds: Chang Su,

Beihong Jin,

Qiwen Shi,

Zhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Chang and Jin, Beihong and Shi, Qiwen and Wang, Zhi}, title = {mmWaveFlow: Unified Enhancement and Generation of mmWave Human Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31366-31376} }
Occlusion-Aware SORT: Observing Occlusion for Robust Multi-Object Tracking: Chunjiang Li,

Jianbo Ma,

Li Shen,

Yanru Chen,

Liangyin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunjiang and Ma, Jianbo and Shen, Li and Chen, Yanru and Chen, Liangyin}, title = {Occlusion-Aware SORT: Observing Occlusion for Robust Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42560-42570} }
V-DPM: 4D Video Reconstruction with Dynamic Point Maps: Edgar Sucar,

Eldar Insafutdinov,

Zihang Lai,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sucar_2026_CVPR, author = {Sucar, Edgar and Insafutdinov, Eldar and Lai, Zihang and Vedaldi, Andrea}, title = {V-DPM: 4D Video Reconstruction with Dynamic Point Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14502-14511} }
DETACH : Decomposed Spatio-Temporal Alignment for Exocentric Video and Ambient Sensors with Staged Learning: Junho Yoon,

Jaemo Jeong,

Hyunju Kim,

Dongman Lee; [pdf] [supp]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Junho and Jeong, Jaemo and Kim, Hyunju and Lee, Dongman}, title = {DETACH : Decomposed Spatio-Temporal Alignment for Exocentric Video and Ambient Sensors with Staged Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12860-12870} }
TIACam: Text-Anchored Invariant Feature Learning with Auto-Augmentation for Camera-Robust Zero-Watermarking: Abdullah Tanvir,

Agnibh Dasgupta,

Xin Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tanvir_2026_CVPR, author = {Tanvir, Abdullah and Dasgupta, Agnibh and Zhong, Xin}, title = {TIACam: Text-Anchored Invariant Feature Learning with Auto-Augmentation for Camera-Robust Zero-Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43030-43039} }
AnthroTAP: Learning Point Tracking with Real-World Motion: Inès Hyeonsu Kim,

Seokju Cho,

Jahyeok Koo,

Junghyun Park,

Jiahui Huang,

Honglak Lee,

Joon-Young Lee,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Cho, Seokju and Koo, Jahyeok and Park, Junghyun and Huang, Jiahui and Lee, Honglak and Lee, Joon-Young and Kim, Seungryong}, title = {AnthroTAP: Learning Point Tracking with Real-World Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42516-42526} }
ViStoryBench: Comprehensive Benchmark Suite for Story Visualization: Cailin Zhuang,

Ailin Huang,

Yaoqi Hu,

Jingwei Wu,

Wei Cheng,

Jiaqi Liao,

Hongyuan Wang,

Xinyao Liao,

Weiwei Cai,

Hengyuan Xu,

Xuanyang Zhang,

Xianfang Zeng,

Zhewei Huang,

Gang Yu,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Cailin and Huang, Ailin and Hu, Yaoqi and Wu, Jingwei and Cheng, Wei and Liao, Jiaqi and Wang, Hongyuan and Liao, Xinyao and Cai, Weiwei and Xu, Hengyuan and Zhang, Xuanyang and Zeng, Xianfang and Huang, Zhewei and Yu, Gang and Zhang, Chi}, title = {ViStoryBench: Comprehensive Benchmark Suite for Story Visualization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9455-9467} }
MMCP-GEN: A Modality-Extensible Diffusion Language Model for Conditional Protein Sequence Generation: Zeyu An,

Wanyu Lin,

Feng Tan,

Shujun Wang; [pdf] [supp]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Zeyu and Lin, Wanyu and Tan, Feng and Wang, Shujun}, title = {MMCP-GEN: A Modality-Extensible Diffusion Language Model for Conditional Protein Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15762-15772} }
Hierarchical Process Reward Models are Symbolic Vision Learners: Shan Zhang,

Aotian Chen,

Kai Zou,

Jindong Gu,

Yuan Xue,

Anton van den Hengel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shan and Chen, Aotian and Zou, Kai and Gu, Jindong and Xue, Yuan and van den Hengel, Anton}, title = {Hierarchical Process Reward Models are Symbolic Vision Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22185-22194} }
PPM-CLIP: Probabilistic Prompt Modeling for Generalizable AI-Generated Image Detection: Xinyuan Wang,

Yingxin Lai,

Zhiming Luo,

Zhihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyuan and Lai, Yingxin and Luo, Zhiming and Liu, Zhihui}, title = {PPM-CLIP: Probabilistic Prompt Modeling for Generalizable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21316-21325} }
Arcadia: Toward a Full-Lifecycle Framework for Embodied Lifelong Learning: Minghe Gao,

Juncheng Li,

Yuze Lin,

Xuqi Liu,

Jiaming Ji,

Xiaoran Pan,

Zihan Xu,

Xian Li,

Mingjie Li,

Wei Ji,

Rong Wei,

Rui Tang,

Qizhou Wang,

Kai Shen,

Jun Xiao,

Qi Wu,

Siliang Tang,

Yueting Zhuang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Minghe and Li, Juncheng and Lin, Yuze and Liu, Xuqi and Ji, Jiaming and Pan, Xiaoran and Xu, Zihan and Li, Xian and Li, Mingjie and Ji, Wei and Wei, Rong and Tang, Rui and Wang, Qizhou and Shen, Kai and Xiao, Jun and Wu, Qi and Tang, Siliang and Zhuang, Yueting}, title = {Arcadia: Toward a Full-Lifecycle Framework for Embodied Lifelong Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1031-1040} }
CodePercept: Code-Grounded Visual STEM Perception for MLLMs: Tongkun Guan,

Zhibo Yang,

Jianqiang Wan,

Mingkun Yang,

Zhentao Guo,

Zijian Hu,

Ruilin Luo,

Ruizhe Chen,

Songtao Jiang,

Peng Wang,

Wei Shen,

Junyang Lin,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Tongkun and Yang, Zhibo and Wan, Jianqiang and Yang, Mingkun and Guo, Zhentao and Hu, Zijian and Luo, Ruilin and Chen, Ruizhe and Jiang, Songtao and Wang, Peng and Shen, Wei and Lin, Junyang and Yang, Xiaokang}, title = {CodePercept: Code-Grounded Visual STEM Perception for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33542-33552} }
Bridging the Perception Gap in Image Super-Resolution Evaluation: Shaolin Su,

Josep M. Rocafort,

Danna Xue,

David Serrano-Lozano,

Lei Sun,

Javier Vazquez-Corral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Shaolin and Rocafort, Josep M. and Xue, Danna and Serrano-Lozano, David and Sun, Lei and Vazquez-Corral, Javier}, title = {Bridging the Perception Gap in Image Super-Resolution Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30532-30542} }
Collaborative Multi-Mode Pruning for Vision-Language Models: Zimeng Wu,

Yunhong Wang,

Donghao Wang,

Jiaxin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zimeng and Wang, Yunhong and Wang, Donghao and Chen, Jiaxin}, title = {Collaborative Multi-Mode Pruning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39561-39571} }
Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera: Mukai Yu,

Mosam Dabhi,

Liuyue Xie,

Sebastian Scherer,

László A. Jeni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Mukai and Dabhi, Mosam and Xie, Liuyue and Scherer, Sebastian and Jeni, L\'aszl\'o A.}, title = {Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6106-6115} }
RADAR: VQ-VAE Decoder of VAR is a Good Student for Restoring Against Degradation by Acceleration: Ziyang Wang,

Yue Zhang,

Mingdao Wang,

Yasen Zhang,

Teer Song,

Yu Tian,

Xueming Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhang, Yue and Wang, Mingdao and Zhang, Yasen and Song, Teer and Tian, Yu and Li, Xueming}, title = {RADAR: VQ-VAE Decoder of VAR is a Good Student for Restoring Against Degradation by Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5273-5282} }
Beyond Weak Supervision: MLLMs-Guided Graded Knowledge Distillation for Unsupervised Camouflaged Object Detection: Huafeng Chen,

Chenguang Zhu,

Yueming Lyu,

Caifeng Shan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Huafeng and Zhu, Chenguang and Lyu, Yueming and Shan, Caifeng}, title = {Beyond Weak Supervision: MLLMs-Guided Graded Knowledge Distillation for Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27547-27557} }
Selective, Regularized, and Calibrated: Harnessing Vision Foundation Models for Cross-Domain Few-Shot Semantic Segmentation: Junyuan Ma,

Xunzhi Xiang,

Wenbin Li,

Qi Fan,

Yang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Junyuan and Xiang, Xunzhi and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {Selective, Regularized, and Calibrated: Harnessing Vision Foundation Models for Cross-Domain Few-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12385-12395} }
LDP-Slicing: Local Differential Privacy for Images via Randomized Bit-Plane Slicing: Yuanming Cao,

Chengqi Li,

Wenbo He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yuanming and Li, Chengqi and He, Wenbo}, title = {LDP-Slicing: Local Differential Privacy for Images via Randomized Bit-Plane Slicing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {92-101} }
Annotation-Efficient Coreset Selection for Context-dependent Segmentation: Jin Zhang,

Zhe Cao,

Biwen Yang,

Ruiheng Zhang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jin and Cao, Zhe and Yang, Biwen and Zhang, Ruiheng}, title = {Annotation-Efficient Coreset Selection for Context-dependent Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20410-20420} }
DuoGen: Towards Autonomous Interleaved Multimodal Generation: Min Shi,

Xiaohui Zeng,

Jiannan Huang,

Yin Cui,

Francesco Ferroni,

Jialuo Li,

Zhaoshuo Li,

Yogesh Balaji,

Haoxiang Wang,

Tsung-Yi Lin,

Xiao Fu,

Yue Zhao,

Chieh-Yun Chen,

Ming-Yu Liu,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Min and Zeng, Xiaohui and Huang, Jiannan and Cui, Yin and Ferroni, Francesco and Li, Jialuo and Li, Zhaoshuo and Balaji, Yogesh and Wang, Haoxiang and Lin, Tsung-Yi and Fu, Xiao and Zhao, Yue and Chen, Chieh-Yun and Liu, Ming-Yu and Shi, Humphrey}, title = {DuoGen: Towards Autonomous Interleaved Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21901-21911} }
MIBURI: Towards Expressive Interactive Gesture Synthesis: M. Hamza Mughal,

Rishabh Dabral,

Vera Demberg,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mughal_2026_CVPR, author = {Mughal, M. Hamza and Dabral, Rishabh and Demberg, Vera and Theobalt, Christian}, title = {MIBURI: Towards Expressive Interactive Gesture Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40031-40041} }
Feed-forward Gaussian Registration for Head Avatar Creation and Editing: Malte Prinzler,

Paulo Gotardo,

Siyu Tang,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Prinzler_2026_CVPR, author = {Prinzler, Malte and Gotardo, Paulo and Tang, Siyu and Bolkart, Timo}, title = {Feed-forward Gaussian Registration for Head Avatar Creation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25270-25280} }
Evidential Deep Partial Label Learning to Quantify Disambiguation Uncertainty: Jinfu Fan,

Jiangnan Li,

Xiaohui Zhong,

Kangrui Ren,

Zhencun Jiang,

Min Gan,

Tianhao Gu,

Linqing Huang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Jinfu and Li, Jiangnan and Zhong, Xiaohui and Ren, Kangrui and Jiang, Zhencun and Gan, Min and Gu, Tianhao and Huang, Linqing}, title = {Evidential Deep Partial Label Learning to Quantify Disambiguation Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24770-24779} }
REL-SF4PASS: Panoramic Semantic Segmentation with REL Depth Representation and Spherical Fusion: Xuewei Li,

Xinghan Bao,

Zhimin Chen,

Xi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuewei and Bao, Xinghan and Chen, Zhimin and Li, Xi}, title = {REL-SF4PASS: Panoramic Semantic Segmentation with REL Depth Representation and Spherical Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27676-27685} }
EEGiT: Teaching Vision Transformers to Understand the EEG signal: Jiahao Zhou,

Chenghao Xu,

Wei Wang,

Erkun Yang,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Jiahao and Xu, Chenghao and Wang, Wei and Yang, Erkun and Deng, Cheng}, title = {EEGiT: Teaching Vision Transformers to Understand the EEG signal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40438-40447} }
MERLIN: Building Low-SNR Robust Multimodal LLMs for Electromagnetic Signals: Junyu Shen,

Zhendong She,

Chenghanyu Zhang,

Yuchuang Sun,

Luqing Luo,

Dingwei Tan,

Zonghao Guo,

Bo Guo,

Zehua Han,

Wupeng Xie,

Yaxin Mu,

Peng Zhang,

Peipei Li,

Fengxiang Wang,

Yangang Sun,

Maosong Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Junyu and She, Zhendong and Zhang, Chenghanyu and Sun, Yuchuang and Luo, Luqing and Tan, Dingwei and Guo, Zonghao and Guo, Bo and Han, Zehua and Xie, Wupeng and Mu, Yaxin and Zhang, Peng and Li, Peipei and Wang, Fengxiang and Sun, Yangang and Sun, Maosong}, title = {MERLIN: Building Low-SNR Robust Multimodal LLMs for Electromagnetic Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8653-8663} }
Think Before You Drive: World Model-Inspired Multimodal Grounding: Haicheng Liao,

Huanming Shen,

Bonan Wang,

Yongkang Li,

Yihong Tang,

Chengyue Wang,

Dingyi Zhuang,

Kehua Chen,

Hai Yang,

Chengzhong Xu,

Zhenning Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Haicheng and Shen, Huanming and Wang, Bonan and Li, Yongkang and Tang, Yihong and Wang, Chengyue and Zhuang, Dingyi and Chen, Kehua and Yang, Hai and Xu, Chengzhong and Li, Zhenning}, title = {Think Before You Drive: World Model-Inspired Multimodal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3677-3687} }
Learning Surgical Robotic Manipulation with 3D Spatial Priors: Yu Sheng,

Lidian Wang,

Xiaomeng Chu,

Jiajun Deng,

Min Cheng,

Yanyong Zhang,

Bei Hua,

Houqiang Li,

Jianmin Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Yu and Wang, Lidian and Chu, Xiaomeng and Deng, Jiajun and Cheng, Min and Zhang, Yanyong and Hua, Bei and Li, Houqiang and Ji, Jianmin}, title = {Learning Surgical Robotic Manipulation with 3D Spatial Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42441-42451} }
Beyond the Global Scores: Fine-Grained Token Grounding as a Robust Detector of LVLM Hallucinations: Tuan Dung Nguyen,

Minh Khoi Ho,

Qi Chen,

Yutong Xie,

Cam-Tu Nguyen,

Minh Khoi Nguyen,

Dang Huy Pham Nguyen,

Anton van den Hengel,

Johan Verjans,

Phi Le Nguyen,

Vu Minh Hieu Phan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and Xie, Yutong and Nguyen, Cam-Tu and Nguyen, Minh Khoi and Nguyen, Dang Huy Pham and van den Hengel, Anton and Verjans, Johan and Le Nguyen, Phi and Phan, Vu Minh Hieu}, title = {Beyond the Global Scores: Fine-Grained Token Grounding as a Robust Detector of LVLM Hallucinations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40235-40244} }
BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models: Shengao Wang,

Wenqi Wang,

Zecheng Wang,

Max Whitton,

Michael Wakeham,

Arjun Chandra,

Joey Huang,

Pengyue Zhu,

Helen Chen,

David Li,

Jeffrey Li,

Shawn Li,

Andrew Zagula,

Amy Zhao,

Andrew Zhu,

Sayaka Nakamura,

Yuki Yamamoto,

Jerry Jun Yokono,

Aaron Mueller,

Bryan A. Plummer,

Kate Saenko,

Venkatesh Saligrama,

Boqing Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shengao and Wang, Wenqi and Wang, Zecheng and Whitton, Max and Wakeham, Michael and Chandra, Arjun and Huang, Joey and Zhu, Pengyue and Chen, Helen and Li, David and Li, Jeffrey and Li, Shawn and Zagula, Andrew and Zhao, Amy and Zhu, Andrew and Nakamura, Sayaka and Yamamoto, Yuki and Yokono, Jerry Jun and Mueller, Aaron and Plummer, Bryan A. and Saenko, Kate and Saligrama, Venkatesh and Gong, Boqing}, title = {BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23696-23708} }
Align Once to Explain: Feature Alignment for Scalable B-cosification of Foundational Vision Transformers: Raphael Maser,

Siddhartha Gairola,

Sukrut Rao,

Bernt Schiele; [pdf] [supp]
[bibtex]
@InProceedings{Maser_2026_CVPR, author = {Maser, Raphael and Gairola, Siddhartha and Rao, Sukrut and Schiele, Bernt}, title = {Align Once to Explain: Feature Alignment for Scalable B-cosification of Foundational Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9869-9879} }
PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image: Ziang Cao,

Fangzhou Hong,

Zhaoxi Chen,

Liang Pan,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Ziang and Hong, Fangzhou and Chen, Zhaoxi and Pan, Liang and Liu, Ziwei}, title = {PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5839-5848} }
DeDelayed: Deleting Remote Inference Delay via On-Device Correction: Dan Jacobellis,

Mateen Ulhaq,

Fabien Racapé,

Hyomin Choi,

Neeraja J. Yadwadkar; [pdf] [supp]
[bibtex]
@InProceedings{Jacobellis_2026_CVPR, author = {Jacobellis, Dan and Ulhaq, Mateen and Racap\'e, Fabien and Choi, Hyomin and Yadwadkar, Neeraja J.}, title = {DeDelayed: Deleting Remote Inference Delay via On-Device Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19339-19348} }
HoloCine: Holistic Generation of Cinematic Multi-Shot Long Video Narratives: Yihao Meng,

Hao Ouyang,

Yue Yu,

Qiuyu Wang,

Wen Wang,

Ka Leong Cheng,

Hanlin Wang,

Shuailei Ma,

Yixuan Li,

Cheng Chen,

Yanhong Zeng,

Xing Zhu,

Yujun Shen,

Huamin Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Yihao and Ouyang, Hao and Yu, Yue and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Wang, Hanlin and Ma, Shuailei and Li, Yixuan and Chen, Cheng and Zeng, Yanhong and Zhu, Xing and Shen, Yujun and Qu, Huamin}, title = {HoloCine: Holistic Generation of Cinematic Multi-Shot Long Video Narratives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {461-471} }
S$^2$-MLLM: Boosting Spatial Reasoning Capability of MLLMs for 3D Visual Grounding with Structural Guidance: Beining Xu,

Siting Zhu,

Zhao Jin,

Junxian Li,

Hesheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Beining and Zhu, Siting and Jin, Zhao and Li, Junxian and Wang, Hesheng}, title = {S\${\textasciicircum}2\$-MLLM: Boosting Spatial Reasoning Capability of MLLMs for 3D Visual Grounding with Structural Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2557-2569} }
Region-Adaptive Sampling for Diffusion Transformers: Ziming Liu,

Yifan Yang,

Chengruidong Zhang,

Yiqi Zhang,

Lili Qiu,

Yang You,

Yuqing Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ziming and Yang, Yifan and Zhang, Chengruidong and Zhang, Yiqi and Qiu, Lili and You, Yang and Yang, Yuqing}, title = {Region-Adaptive Sampling for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2346-2356} }
Scene Reconstruction as Mapping Priors for 3D Detection: Yang Fu,

Yuliang Zou,

Hao Xiang,

Xin Huang,

Yijing Bai,

Chen Song,

Weijing Shi,

Govind Thattai,

Dragomir Anguelov,

Mingxing Tan,

Yingwei Li; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Yang and Zou, Yuliang and Xiang, Hao and Huang, Xin and Bai, Yijing and Song, Chen and Shi, Weijing and Thattai, Govind and Anguelov, Dragomir and Tan, Mingxing and Li, Yingwei}, title = {Scene Reconstruction as Mapping Priors for 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18734-18744} }
Spatial-SSRL: Enhancing Spatial Understanding via Self-Supervised Reinforcement Learning: Yuhong Liu,

Beichen Zhang,

Yuhang Zang,

Yuhang Cao,

Long Xing,

Xiaoyi Dong,

Haodong Duan,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuhong and Zhang, Beichen and Zang, Yuhang and Cao, Yuhang and Xing, Long and Dong, Xiaoyi and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Spatial-SSRL: Enhancing Spatial Understanding via Self-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9570-9581} }
SoliReward: Mitigating Susceptibility to Reward Hacking and Annotation Noise in Video Generation Reward Models: Jiesong Lian,

Ruizhe Zhong,

Zixiang Zhou,

Xiaoyue Mi,

Long Hu,

Yuan Zhou,

Qinglin Lu,

Yixue Hao,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Jiesong and Zhong, Ruizhe and Zhou, Zixiang and Mi, Xiaoyue and Hu, Long and Zhou, Yuan and Lu, Qinglin and Hao, Yixue and Yan, Junchi}, title = {SoliReward: Mitigating Susceptibility to Reward Hacking and Annotation Noise in Video Generation Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12798-12807} }
Color When It Counts: Grayscale-Guided Online Triggering for Always-On Streaming Video Sensing: Weitong Cai,

Hang Zhang,

Yukai Huang,

Shitong Sun,

Jiankang Deng,

Songcen Xu,

Jifei Song,

Zhensong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Weitong and Zhang, Hang and Huang, Yukai and Sun, Shitong and Deng, Jiankang and Xu, Songcen and Song, Jifei and Zhang, Zhensong}, title = {Color When It Counts: Grayscale-Guided Online Triggering for Always-On Streaming Video Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9784-9793} }
UniSpector: Towards Universal Open-set Defect Recognition via Spectral-Contrastive Visual Prompting: Geonuk Kim,

Minhoi Kim,

Kangil Lee,

Minsu Kim,

Hyeonseong Jeon,

Jeonghoon Han,

Hyoungjoon Lim,

Junho Yim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Geonuk and Kim, Minhoi and Lee, Kangil and Kim, Minsu and Jeon, Hyeonseong and Han, Jeonghoon and Lim, Hyoungjoon and Yim, Junho}, title = {UniSpector: Towards Universal Open-set Defect Recognition via Spectral-Contrastive Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6261-6270} }
Globscope: Toward a Global View of the Loss Landscape: Mashiat Mustaq,

Xavier M. Tricoche; [pdf] [supp]
[bibtex]
@InProceedings{Mustaq_2026_CVPR, author = {Mustaq, Mashiat and Tricoche, Xavier M.}, title = {Globscope: Toward a Global View of the Loss Landscape}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5263-5272} }
PaNDaS: Learnable Shape Interpolation Modeling with Localized Control: Thomas Besnier,

Emery Pierson,

Sylvain Arguillere,

Maks Ovsjanikov,

Mohamed Daoudi; [pdf] [supp]
[bibtex]
@InProceedings{Besnier_2026_CVPR, author = {Besnier, Thomas and Pierson, Emery and Arguillere, Sylvain and Ovsjanikov, Maks and Daoudi, Mohamed}, title = {PaNDaS: Learnable Shape Interpolation Modeling with Localized Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13102-13112} }
Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation: Yunhong Lu,

Yanhong Zeng,

Haobo Li,

Hao Ouyang,

Qiuyu Wang,

Ka Leong Cheng,

Jiapeng Zhu,

Hengyuan Cao,

Zhipeng Zhang,

Xing Zhu,

Yujun Shen,

Min Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yunhong and Zeng, Yanhong and Li, Haobo and Ouyang, Hao and Wang, Qiuyu and Cheng, Ka Leong and Zhu, Jiapeng and Cao, Hengyuan and Zhang, Zhipeng and Zhu, Xing and Shen, Yujun and Zhang, Min}, title = {Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34385-34397} }
AlignPose: Generalizable 6D Pose Estimation via Multi-view Feature-metric Alignment: Anna Šárová Mikeštíková,

Médéric Fourmy,

Martin Cifka,

Josef Sivic,

Vladimir Petrik; [pdf] [supp]
[bibtex]
@InProceedings{Mikestikova_2026_CVPR, author = {Mike\v{s}t{\'\i}kov\'a, Anna \v{S}\'arov\'a and Fourmy, M\'ed\'eric and Cifka, Martin and Sivic, Josef and Petrik, Vladimir}, title = {AlignPose: Generalizable 6D Pose Estimation via Multi-view Feature-metric Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14626-14636} }
Den-TP: A Density-Balanced Data Curation and Evaluation Framework for Trajectory Prediction: Ruining Yang,

Yi Xu,

Yun Fu,

Lili Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ruining and Xu, Yi and Fu, Yun and Su, Lili}, title = {Den-TP: A Density-Balanced Data Curation and Evaluation Framework for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10632-10641} }
Towards Streaming Referring Video Segmentation via Large Language Model: Wenkang Zhang,

Kaicheng Yang,

Xiang An,

Qiang Li,

Ziyong Feng,

Wankou Yang,

Jiankang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenkang and Yang, Kaicheng and An, Xiang and Li, Qiang and Feng, Ziyong and Yang, Wankou and Deng, Jiankang}, title = {Towards Streaming Referring Video Segmentation via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24598-24607} }
Graph-to-Frame RAG: Visual-Space Knowledge Fusion for Training-Free and Auditable Video Reasoning: Songyuan Yang,

Weijiang Yu,

Ziyu Liu,

Guijian Tang,

Wenjing Yang,

Huibin Tan,

Nong Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Yu, Weijiang and Liu, Ziyu and Tang, Guijian and Yang, Wenjing and Tan, Huibin and Xiao, Nong}, title = {Graph-to-Frame RAG: Visual-Space Knowledge Fusion for Training-Free and Auditable Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33805-33815} }
VCP-Attack: Visual-Contrastive Projection for Transferable Black-Box Targeted Attacks on Large Vision-Language Models: Jiawei Zhao,

Minjie Du,

Zihan Qin,

Zhuoran Wang,

Lizhe Xie,

Yining Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiawei and Du, Minjie and Qin, Zihan and Wang, Zhuoran and Xie, Lizhe and Hu, Yining}, title = {VCP-Attack: Visual-Contrastive Projection for Transferable Black-Box Targeted Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30110-30119} }
iSHIFT: Lightweight Slow-Fast GUI Agent with Adaptive Perception: Sarthak Mehrotra,

Sairam VC Rebbapragada,

Mani Bonthu,

Vineeth N. Balasubramanian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehrotra_2026_CVPR, author = {Mehrotra, Sarthak and Rebbapragada, Sairam VC and Bonthu, Mani and Balasubramanian, Vineeth N.}, title = {iSHIFT: Lightweight Slow-Fast GUI Agent with Adaptive Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6229-6238} }
Guiding Diffusion Models with Fine-Grained Conditions and Semantics-Preserving Sampling for One-Shot Federated Learning: Xiaojun Deng,

Tianchi Liao,

Zhiyuan Liu,

Chuan Chen,

Zibin Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaojun and Liao, Tianchi and Liu, Zhiyuan and Chen, Chuan and Zheng, Zibin}, title = {Guiding Diffusion Models with Fine-Grained Conditions and Semantics-Preserving Sampling for One-Shot Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31779-31789} }
MemFlow: A Lightweight Forward Memorizing Framework for Quick Domain Adaptive Feature Mapping: Jianming Lv,

Chengjun Wang,

Depin Liang,

Qianli Ma,

Wei Chen,

Xueqi Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Jianming and Wang, Chengjun and Liang, Depin and Ma, Qianli and Chen, Wei and Cheng, Xueqi}, title = {MemFlow: A Lightweight Forward Memorizing Framework for Quick Domain Adaptive Feature Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36968-36977} }
NTK-Guided Implicit Neural Teaching: Chen Zhang,

Wei Zuo,

Bingyang Cheng,

Yikun Wang,

Wei-Bin Kou,

Yik-Chung Wu,

Ngai Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chen and Zuo, Wei and Cheng, Bingyang and Wang, Yikun and Kou, Wei-Bin and Wu, Yik-Chung and Wong, Ngai}, title = {NTK-Guided Implicit Neural Teaching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17248-17258} }
See What We Cannot See: A Geo-guided Reasoning Benchmark for Object Counting under Adverse Earth Observation Conditions: Jiayi Wang,

Zhihong Tan,

Hongchen Wei,

Daiqin Yang,

Zhenzhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Tan, Zhihong and Wei, Hongchen and Yang, Daiqin and Chen, Zhenzhong}, title = {See What We Cannot See: A Geo-guided Reasoning Benchmark for Object Counting under Adverse Earth Observation Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42191-42201} }
AdaRadar: Rate Adaptive Spectral Compression for Radar-based Perception: Jinho Park,

Se Young Chun,

Mingoo Seok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jinho and Chun, Se Young and Seok, Mingoo}, title = {AdaRadar: Rate Adaptive Spectral Compression for Radar-based Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19349-19359} }
Lighting-grounded Video Generation with Renderer-based Agent Reasoning: Ziqi Cai,

Taoyu Yang,

Zheng Chang,

Si Li,

Han Jiang,

Shuchen Weng,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Ziqi and Yang, Taoyu and Chang, Zheng and Li, Si and Jiang, Han and Weng, Shuchen and Shi, Boxin}, title = {Lighting-grounded Video Generation with Renderer-based Agent Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20055-20065} }
RefAV: Towards Planning-Centric Scenario Mining: Cainan Davidson,

Deva Ramanan,

Neehar Peri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Davidson_2026_CVPR, author = {Davidson, Cainan and Ramanan, Deva and Peri, Neehar}, title = {RefAV: Towards Planning-Centric Scenario Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21537-21548} }
GeoCoT: Towards Reliable Remote Sensing Reasoning with Manifold Perspective: Daixun Li,

Zirui Li,

Sibo He,

Jiayun Tian,

Mingxiang Cao,

Weiying Xie,

Yunke Wang,

Xin Zhang,

Yusi Zhang,

Yunsong Li,

Chang Xu,

Leyuan Fang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Daixun and Li, Zirui and He, Sibo and Tian, Jiayun and Cao, Mingxiang and Xie, Weiying and Wang, Yunke and Zhang, Xin and Zhang, Yusi and Li, Yunsong and Xu, Chang and Fang, Leyuan}, title = {GeoCoT: Towards Reliable Remote Sensing Reasoning with Manifold Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20575-20585} }
WildPose: A Unified Framework for Robust Pose Estimation in the Wild: Jianhao Zheng,

Liyuan Zhu,

Zihan Zhu,

Iro Armeni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jianhao and Zhu, Liyuan and Zhu, Zihan and Armeni, Iro}, title = {WildPose: A Unified Framework for Robust Pose Estimation in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28903-28913} }
IMS3: Breaking Distributional Aggregation in Diffusion-Based Dataset Distillation: Chenru Wang,

Yunyi Chen,

Zijun Yang,

Joey Tianyi Zhou,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenru and Chen, Yunyi and Yang, Zijun and Zhou, Joey Tianyi and Zhang, Chi}, title = {IMS3: Breaking Distributional Aggregation in Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26667-26677} }
PhenoYieldNet: Learning Crop-Aware Phenological Responses for Multi-Crop Yield Prediction: Yu Luo,

Xiaogang Zhu,

Shan Zeng,

Wei Xiang,

Thomas Francis Bishop,

Zhiyong Wang,

Kun Hu; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Yu and Zhu, Xiaogang and Zeng, Shan and Xiang, Wei and Bishop, Thomas Francis and Wang, Zhiyong and Hu, Kun}, title = {PhenoYieldNet: Learning Crop-Aware Phenological Responses for Multi-Crop Yield Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15870-15879} }
PG-VTON: Single-Pass Training-Free Virtual Try-On via Patch-Guided Reference Alignment: Guohao Zhao,

Yuxin Peng; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Guohao and Peng, Yuxin}, title = {PG-VTON: Single-Pass Training-Free Virtual Try-On via Patch-Guided Reference Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7859-7868} }
ParTY: Part-Guidance for Expressive Text-to-Motion Synthesis: KunHo Heo,

SuYeon Kim,

Yonghyun Gwon,

Youngbin Kim,

MyeongAh Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2026_CVPR, author = {Heo, KunHo and Kim, SuYeon and Gwon, Yonghyun and Kim, Youngbin and Cho, MyeongAh}, title = {ParTY: Part-Guidance for Expressive Text-to-Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23549-23558} }
BOP-ASK: Object-Interaction Reasoning for Vision-Language Models: Vineet Bhat,

Sungsu Kim,

Valts Blukis,

Greg Heinrich,

Prashanth Krishnamurthy,

Ramesh Karri,

Stan Birchfield,

Farshad Khorrami,

Jonathan Tremblay; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhat_2026_CVPR, author = {Bhat, Vineet and Kim, Sungsu and Blukis, Valts and Heinrich, Greg and Krishnamurthy, Prashanth and Karri, Ramesh and Birchfield, Stan and Khorrami, Farshad and Tremblay, Jonathan}, title = {BOP-ASK: Object-Interaction Reasoning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16746-16757} }
Label-Free Cross-Task LoRA Merging with Null-Space Compression: Wonyoung Lee,

Wooseong Jeong,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Wonyoung and Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Label-Free Cross-Task LoRA Merging with Null-Space Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {847-859} }
Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation: Ziyue Lin,

Jiahe Hou,

Hongyu Xia,

Xinrui Xie,

Feifei Wang,

Yuyin Zhou,

Wei Wang,

Jiawei Liu,

Liangqiong Qu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Ziyue and Hou, Jiahe and Xia, Hongyu and Xie, Xinrui and Wang, Feifei and Zhou, Yuyin and Wang, Wei and Liu, Jiawei and Qu, Liangqiong}, title = {Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35967-35977} }
2ndMatch: Finetuning Pruned Diffusion Models via Second-Order Jacobian Matching: Caleb Zheng,

Eli Shlizerman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Caleb and Shlizerman, Eli}, title = {2ndMatch: Finetuning Pruned Diffusion Models via Second-Order Jacobian Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43385-43395} }
CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning: Jiange Yang,

Yansong Shi,

Haoyi Zhu,

Mingyu Liu,

Kaijing Ma,

Yating Wang,

Gangshan Wu,

Tong He,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiange and Shi, Yansong and Zhu, Haoyi and Liu, Mingyu and Ma, Kaijing and Wang, Yating and Wu, Gangshan and He, Tong and Wang, Limin}, title = {CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42352-42363} }
An Optimal Transport-driven Approach for Cultivating Latent Space in Online Incremental Learning: Quyen Tran,

Hai Nguyen,

Quan Dao,

Hoang Phan,

Linh Van,

Khoat Than,

Dinh Phung,

Dimitris Metaxas,

Trung Le; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Quyen and Nguyen, Hai and Dao, Quan and Phan, Hoang and Van, Linh and Than, Khoat and Phung, Dinh and Metaxas, Dimitris and Le, Trung}, title = {An Optimal Transport-driven Approach for Cultivating Latent Space in Online Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10851-10862} }
Latent Diffusion Inversion Requires Understanding the Latent Space: Mingxing Rao,

Bowen Qu,

Daniel Moyer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rao_2026_CVPR, author = {Rao, Mingxing and Qu, Bowen and Moyer, Daniel}, title = {Latent Diffusion Inversion Requires Understanding the Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34971-34980} }
VisionDirector: Vision-Language Guided Closed-Loop Refinement for Generative Image Synthesis: Meng Chu,

Senqiao Yang,

Haoxuan Che,

Suiyun Zhang,

Xichen Zhang,

Shaozuo Yu,

Haokun Gui,

Zhefan Rao,

Dandan Tu,

Rui Liu,

Jiaya Jia; [pdf] [arXiv]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Meng and Yang, Senqiao and Che, Haoxuan and Zhang, Suiyun and Zhang, Xichen and Yu, Shaozuo and Gui, Haokun and Rao, Zhefan and Tu, Dandan and Liu, Rui and Jia, Jiaya}, title = {VisionDirector: Vision-Language Guided Closed-Loop Refinement for Generative Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9203-9212} }
TIGER: A Unified Framework for Time, Images and Geo-location Retrieval: David G. Shatwell,

Sirnam Swetha,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shatwell_2026_CVPR, author = {Shatwell, David G. and Swetha, Sirnam and Shah, Mubarak}, title = {TIGER: A Unified Framework for Time, Images and Geo-location Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23955-23965} }
Large-scale Codec Avatars: The Unreasonable Effectiveness of Large-scale Avatar Pretraining: Junxuan Li,

Rawal Khirodkar,

Egor Zakharov,

Jihyun Lee,

Zhaoen Su,

Yuan Dong,

Julieta Martinez,

Kai Li,

Qingyang Tan,

Takaaki Shiratori,

Matthew Hu,

Peihong Guo,

Xuhua Huang,

Zhongshi Jiang,

Lingchen Yang,

Ariyan Zarei,

Marco Pesavento,

Yichen Xu,

Chengan He,

He Wen,

Giljoo Nam,

Teng Deng,

Wyatt Borsos,

Anjali Thakrar,

Jean-Charles Bazin,

Rinat Abdrashitov,

Carsten Stoll,

Ginés Hidalgo,

James Booth,

Lucy Wang,

Xiaowen Ma,

Yu Rong,

Sairanjith Thalanki,

Chen Cao,

Christian Häne,

Abhishek Kar,

Sofien Bouaziz,

Jason Saragih,

Yaser Sheikh,

Shunsuke Saito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Junxuan and Khirodkar, Rawal and Zakharov, Egor and Lee, Jihyun and Su, Zhaoen and Dong, Yuan and Martinez, Julieta and Li, Kai and Tan, Qingyang and Shiratori, Takaaki and Hu, Matthew and Guo, Peihong and Huang, Xuhua and Jiang, Zhongshi and Yang, Lingchen and Zarei, Ariyan and Pesavento, Marco and Xu, Yichen and He, Chengan and Wen, He and Nam, Giljoo and Deng, Teng and Borsos, Wyatt and Thakrar, Anjali and Bazin, Jean-Charles and Abdrashitov, Rinat and Stoll, Carsten and Hidalgo, Gin\'es and Booth, James and Wang, Lucy and Ma, Xiaowen and Rong, Yu and Thalanki, Sairanjith and Cao, Chen and H\"ane, Christian and Kar, Abhishek and Bouaziz, Sofien and Saragih, Jason and Sheikh, Yaser and Saito, Shunsuke}, title = {Large-scale Codec Avatars: The Unreasonable Effectiveness of Large-scale Avatar Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18204-18215} }
Grounded Latents for Entity-Centric 4D Scene Generation: Jinhyung Park,

Navyata Sanghvi,

Erica Weng,

Shawn Hunt,

Shinya Tanaka,

Hironobu Fujiyoshi,

Kris Kitani; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jinhyung and Sanghvi, Navyata and Weng, Erica and Hunt, Shawn and Tanaka, Shinya and Fujiyoshi, Hironobu and Kitani, Kris}, title = {Grounded Latents for Entity-Centric 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21420-21430} }
PosterOmni: Generalized Artistic Poster Creation via Task Distillation and Unified Reward Feedback: Sixiang Chen,

Jianyu Lai,

Jialin Gao,

Hengyu Shi,

Zhongying Liu,

Tian Ye,

Junfeng Luo,

Xiaoming Wei,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Sixiang and Lai, Jianyu and Gao, Jialin and Shi, Hengyu and Liu, Zhongying and Ye, Tian and Luo, Junfeng and Wei, Xiaoming and Zhu, Lei}, title = {PosterOmni: Generalized Artistic Poster Creation via Task Distillation and Unified Reward Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5978-5987} }
Seeing as Experts Do: A Knowledge-Augmented Agent for Open-Set Fine-Grained Visual Understanding: Junhan Chen,

Zilu Zhou,

Yujun Tong,

Dongliang Chang,

Yitao Luo,

Zhanyu Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junhan and Zhou, Zilu and Tong, Yujun and Chang, Dongliang and Luo, Yitao and Ma, Zhanyu}, title = {Seeing as Experts Do: A Knowledge-Augmented Agent for Open-Set Fine-Grained Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41446-41455} }
DSERT-RoLL: Robust Multi-Modal Perception for Diverse Driving Conditions with Stereo Event-RGB-Thermal Cameras, 4D Radar, and Dual-LiDAR: Hoonhee Cho,

Jae-Young Kang,

Yuhwan Jeong,

Yunseo Yang,

Wonyoung Lee,

Youngho Kim,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Hoonhee and Kang, Jae-Young and Jeong, Yuhwan and Yang, Yunseo and Lee, Wonyoung and Kim, Youngho and Yoon, Kuk-Jin}, title = {DSERT-RoLL: Robust Multi-Modal Perception for Diverse Driving Conditions with Stereo Event-RGB-Thermal Cameras, 4D Radar, and Dual-LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33022-33035} }
Multimodal Learning on Low-Quality Data with Conformal Predictive Self-Calibration: Xun Jiang,

Yufan Gu,

Disen Hu,

Yuqing Hou,

Yazhou Yao,

Fumin Shen,

Heng Tao Shen,

Xing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xun and Gu, Yufan and Hu, Disen and Hou, Yuqing and Yao, Yazhou and Shen, Fumin and Shen, Heng Tao and Xu, Xing}, title = {Multimodal Learning on Low-Quality Data with Conformal Predictive Self-Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23041-23050} }
BridgeEQA: Virtual Embodied Agents for Real Bridge Inspections: Subin Varghese,

Joshua Gao,

Asad Ur Rahman,

Vedhus Hoskere; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Varghese_2026_CVPR, author = {Varghese, Subin and Gao, Joshua and Rahman, Asad Ur and Hoskere, Vedhus}, title = {BridgeEQA: Virtual Embodied Agents for Real Bridge Inspections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8163-8173} }
A Unified Perspective on Adversarial Membership Manipulation in Vision Models: Ruize Gao,

Kaiwen Zhou,

Yongqiang Chen,

Feng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Ruize and Zhou, Kaiwen and Chen, Yongqiang and Liu, Feng}, title = {A Unified Perspective on Adversarial Membership Manipulation in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1554-1564} }
Seeing Through Touch: Tactile-Driven Visual Localization of Material Regions: Seongyu Kim,

Seungwoo Lee,

Hyeonggon Ryu,

Joon Son Chung,

Arda Senocak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seongyu and Lee, Seungwoo and Ryu, Hyeonggon and Chung, Joon Son and Senocak, Arda}, title = {Seeing Through Touch: Tactile-Driven Visual Localization of Material Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8717-8726} }
Seeing Motion Through Polarity for Event-based Action Recognition: Meiqi Cao,

Jiachao Zhang,

Xin Jiang,

Rui Yan,

Yazhou Yao,

Zechao Li,

Xiangbo Shu; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Meiqi and Zhang, Jiachao and Jiang, Xin and Yan, Rui and Yao, Yazhou and Li, Zechao and Shu, Xiangbo}, title = {Seeing Motion Through Polarity for Event-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37075-37085} }
DynamicsBoost: Dynamic Plausible Video Generation via Annotation-Free Continuation Preference Optimization: Jiaxing Li,

Jiepeng Wang,

Junyao Gao,

Yang Liu,

Eric Li,

Bo An,

Hao-Xiang Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiaxing and Wang, Jiepeng and Gao, Junyao and Liu, Yang and Li, Eric and An, Bo and Guo, Hao-Xiang}, title = {DynamicsBoost: Dynamic Plausible Video Generation via Annotation-Free Continuation Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20024-20033} }
Ultra-Fast Neural Video Compression: Jiahao Li,

Wenxuan Xie,

Zhaoyang Jia,

Bin Li,

Zongyu Guo,

Xiaoyi Zhang,

Yan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Xie, Wenxuan and Jia, Zhaoyang and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Lu, Yan}, title = {Ultra-Fast Neural Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41311-41321} }
Data-Centric Meta-Learning for Robust Few-Shot Generalization: Jongmin Lim,

Soobin Cha,

Jaehun Park,

Inho Oh,

Minho Park,

Kwangsu Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Jongmin and Cha, Soobin and Park, Jaehun and Oh, Inho and Park, Minho and Kim, Kwangsu}, title = {Data-Centric Meta-Learning for Robust Few-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5543-5552} }
MICON-Bench: Benchmarking and Enhancing Multi-Image Context Image Generation in Unified Multimodal Models: Mingrui Wu,

Hang Liu,

Jiayi Ji,

Xiaoshuai Sun,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mingrui and Liu, Hang and Ji, Jiayi and Sun, Xiaoshuai and Ji, Rongrong}, title = {MICON-Bench: Benchmarking and Enhancing Multi-Image Context Image Generation in Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8227-8236} }
Hidden Dangers of Compositional Generation: Diagnosing Semantic Safety Failures in Text-to-Image Models: Haoming Yang,

Ke Ma,

Ligong Zhang,

Xiaojun Jia,

Yingfei Sun,

Qianqian Xu,

Qingming Huang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haoming and Ma, Ke and Zhang, Ligong and Jia, Xiaojun and Sun, Yingfei and Xu, Qianqian and Huang, Qingming}, title = {Hidden Dangers of Compositional Generation: Diagnosing Semantic Safety Failures in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15700-15709} }
iMontage: Unified, Versatile, Highly Dynamic Many-to-many Image Generation: Zhoujie Fu,

Xianfang Zeng,

Jinghong Lan,

Xinyao Liao,

Cheng Chen,

Junyi Chen,

Jiacheng Wei,

Wei Cheng,

Shiyu Liu,

Yunuo Chen,

Gang Yu,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Zhoujie and Zeng, Xianfang and Lan, Jinghong and Liao, Xinyao and Chen, Cheng and Chen, Junyi and Wei, Jiacheng and Cheng, Wei and Liu, Shiyu and Chen, Yunuo and Yu, Gang and Lin, Guosheng}, title = {iMontage: Unified, Versatile, Highly Dynamic Many-to-many Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16237-16247} }
CRAFT: Aligning Diffusion Models with Fine-Tuning Is Easier Than You Think: Zening Sun,

Zhengpeng Xie,

Lichen Bai,

Shitong Shao,

Shuo Yang,

Zeke Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zening and Xie, Zhengpeng and Bai, Lichen and Shao, Shitong and Yang, Shuo and Xie, Zeke}, title = {CRAFT: Aligning Diffusion Models with Fine-Tuning Is Easier Than You Think}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35841-35850} }
Scaling4D: Pushing the Frontier of Video Novel View Synthesis through Large-Scale Monocular Videos: Hongrui Cai,

Junjie Luo,

Zhihong Fu,

Shengnan Zhu,

Jiawei Wen,

Wanquan Feng,

Songtao Zhao,

Qian He; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Hongrui and Luo, Junjie and Fu, Zhihong and Zhu, Shengnan and Wen, Jiawei and Feng, Wanquan and Zhao, Songtao and He, Qian}, title = {Scaling4D: Pushing the Frontier of Video Novel View Synthesis through Large-Scale Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11174-11184} }
BEA-GS: BEyond RAdiance Supervision in 3DGS for Precise Object Extraction: Alessio Mazzucchelli,

Maria Naranjo-Almeida,

Jorge Bustos-Sanchez,

Mariella Dimiccoli,

Francesc Moreno-Noguer,

Jordi Sanchez-Riera,

Adrian Penate-Sanchez; [pdf] [supp]
[bibtex]
@InProceedings{Mazzucchelli_2026_CVPR, author = {Mazzucchelli, Alessio and Naranjo-Almeida, Maria and Bustos-Sanchez, Jorge and Dimiccoli, Mariella and Moreno-Noguer, Francesc and Sanchez-Riera, Jordi and Penate-Sanchez, Adrian}, title = {BEA-GS: BEyond RAdiance Supervision in 3DGS for Precise Object Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41054-41064} }
M4-SAM: Multi-Modal Mixture-of-Experts with Memory-Augmented SAM for RGB-D Video Salient Object Detection: Jiyuan Liu,

Jia Lin,

Xiaofei Zhou,

Runmin Cong,

Deyang Liu,

Zhi Liu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiyuan and Lin, Jia and Zhou, Xiaofei and Cong, Runmin and Liu, Deyang and Liu, Zhi}, title = {M4-SAM: Multi-Modal Mixture-of-Experts with Memory-Augmented SAM for RGB-D Video Salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24970-24979} }
Circuit Mechanisms for Spatial Relation Generation in Diffusion Transformers: Binxu Wang,

Jingxuan Fan,

Xu Pan; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Binxu and Fan, Jingxuan and Pan, Xu}, title = {Circuit Mechanisms for Spatial Relation Generation in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23612-23621} }
FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition: Jie Zhu,

Xiao Guo,

Yiyang Su,

Anil Jain,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Guo, Xiao and Su, Yiyang and Jain, Anil and Liu, Xiaoming}, title = {FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32756-32766} }
Chain of World: World Model Thinking in Latent Motion: Fuxiang Yang,

Donglin Di,

Lulu Tang,

Xuancheng Zhang,

Lei Fan,

Hao Li,

Wei Chen,

Tonghua Su,

Baorui Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Fuxiang and Di, Donglin and Tang, Lulu and Zhang, Xuancheng and Fan, Lei and Li, Hao and Chen, Wei and Su, Tonghua and Ma, Baorui}, title = {Chain of World: World Model Thinking in Latent Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6675-6684} }
Learning Forgery-Aware Lip Representations Without Forgery Priors: Bofan Chen,

Hongyu Zhu,

Yi He,

Sichu Liang,

Shi-Lin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Bofan and Zhu, Hongyu and He, Yi and Liang, Sichu and Wang, Shi-Lin}, title = {Learning Forgery-Aware Lip Representations Without Forgery Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42911-42921} }
MimiCAT: Mimic with Correspondence-Aware Cascade-Transformer for Category-Free 3D Pose Transfer: Zenghao Chai,

Chen Tang,

Yongkang Wong,

Xulei Yang,

Mohan Kankanhalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chai_2026_CVPR, author = {Chai, Zenghao and Tang, Chen and Wong, Yongkang and Yang, Xulei and Kankanhalli, Mohan}, title = {MimiCAT: Mimic with Correspondence-Aware Cascade-Transformer for Category-Free 3D Pose Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13962-13973} }
2-Shots in the Dark: Low-Light Denoising with Minimal Data Acquisition: Liying Lu,

Raphael Achddou,

Sabine Süsstrunk; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Liying and Achddou, Raphael and S\"usstrunk, Sabine}, title = {2-Shots in the Dark: Low-Light Denoising with Minimal Data Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15496-15505} }
How to Take a Memorable Picture? Empowering Users with Actionable Feedback: Francesco Laiti,

Davide Talon,

Jacopo Staiano,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Laiti_2026_CVPR, author = {Laiti, Francesco and Talon, Davide and Staiano, Jacopo and Ricci, Elisa}, title = {How to Take a Memorable Picture? Empowering Users with Actionable Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29738-29749} }
Coded-E2LF: Coded Aperture Light Field Imaging from Events: Tomoya Tsuchida,

Keita Takahashi,

Chihiro Tsutake,

Toshiaki Fujii,

Hajime Nagahara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tsuchida_2026_CVPR, author = {Tsuchida, Tomoya and Takahashi, Keita and Tsutake, Chihiro and Fujii, Toshiaki and Nagahara, Hajime}, title = {Coded-E2LF: Coded Aperture Light Field Imaging from Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19726-19736} }
General Process Reward Modeling for Robotic Reinforcement Learning: Huajie Tan,

Sixiang Chen,

Yijie Xu,

Zixiao Wang,

Cheng Chi,

Yuheng Ji,

Yaoxu Lyu,

Zhongxia Zhao,

Xiansheng Chen,

Peterson Co,

Shaoxuan Xie,

Guocai Yao,

Pengwei Wang,

Zhongyuan Wang,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Huajie and Chen, Sixiang and Xu, Yijie and Wang, Zixiao and Chi, Cheng and Ji, Yuheng and Lyu, Yaoxu and Zhao, Zhongxia and Chen, Xiansheng and Co, Peterson and Xie, Shaoxuan and Yao, Guocai and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {General Process Reward Modeling for Robotic Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22412-22422} }
Inferring Compositional 4D Scenes without Ever Seeing One: Ahmet Berke Gökmen,

Ajad Chhatkuli,

Luc Van Gool,

Danda Pani Paudel; [pdf] [supp]
[bibtex]
@InProceedings{Gokmen_2026_CVPR, author = {G\"okmen, Ahmet Berke and Chhatkuli, Ajad and Van Gool, Luc and Paudel, Danda Pani}, title = {Inferring Compositional 4D Scenes without Ever Seeing One}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {317-329} }
ZINA: Multimodal Fine-grained Hallucination Detection and Editing: Yuiga Wada,

Kazuki Matsuda,

Komei Sugiura,

Graham Neubig; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wada_2026_CVPR, author = {Wada, Yuiga and Matsuda, Kazuki and Sugiura, Komei and Neubig, Graham}, title = {ZINA: Multimodal Fine-grained Hallucination Detection and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32528-32538} }
Mistake Attribution: Fine-Grained Mistake Understanding in Egocentric Videos: Yayuan Li,

Aadit Jain,

Filippos Bellos,

Jason J. Corso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yayuan and Jain, Aadit and Bellos, Filippos and Corso, Jason J.}, title = {Mistake Attribution: Fine-Grained Mistake Understanding in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23966-23976} }
Multi-Crit: Benchmarking Multimodal Judges on Pluralistic Criteria-Following: Tianyi Xiong,

Yi Ge,

Ming Li,

Zuolong Zhang,

Pranav Kulkarni,

Kaishen Wang,

Qi He,

Zeying Zhu,

Chenxi Liu,

Ruibo Chen,

Tong Zheng,

Yanshuo Chen,

Xiyao Wang,

Renrui Zhang,

Wenhu Chen,

Heng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianyi and Ge, Yi and Li, Ming and Zhang, Zuolong and Kulkarni, Pranav and Wang, Kaishen and He, Qi and Zhu, Zeying and Liu, Chenxi and Chen, Ruibo and Zheng, Tong and Chen, Yanshuo and Wang, Xiyao and Zhang, Renrui and Chen, Wenhu and Huang, Heng}, title = {Multi-Crit: Benchmarking Multimodal Judges on Pluralistic Criteria-Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8641-8652} }
InfiniDepth: Arbitrary-Resolution and Fine-Grained Depth Estimation with Neural Implicit Fields: Hao Yu,

Haotong Lin,

Jiawei Wang,

Jiaxin Li,

Yida Wang,

Xueyang Zhang,

Yue Wang,

Xiaowei Zhou,

Ruizhen Hu,

Sida Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Hao and Lin, Haotong and Wang, Jiawei and Li, Jiaxin and Wang, Yida and Zhang, Xueyang and Wang, Yue and Zhou, Xiaowei and Hu, Ruizhen and Peng, Sida}, title = {InfiniDepth: Arbitrary-Resolution and Fine-Grained Depth Estimation with Neural Implicit Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26920-26930} }
OneThinker: All-in-one Reasoning Model for Image and Video: Kaituo Feng,

Manyuan Zhang,

Hongyu Li,

Kaixuan Fan,

Shuang Chen,

Yilei Jiang,

Dian Zheng,

Peiwen Sun,

Yiyuan Zhang,

Haoze Sun,

Yan Feng,

Peng Pei,

Xunliang Cai,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Kaituo and Zhang, Manyuan and Li, Hongyu and Fan, Kaixuan and Chen, Shuang and Jiang, Yilei and Zheng, Dian and Sun, Peiwen and Zhang, Yiyuan and Sun, Haoze and Feng, Yan and Pei, Peng and Cai, Xunliang and Yue, Xiangyu}, title = {OneThinker: All-in-one Reasoning Model for Image and Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5432-5443} }
ReLaGS: Relational Language Gaussian Splatting: Yaxu Xie,

Abdalla Arafa,

Alireza Javanmardi,

Christen Millerdurai,

Jia Cheng Hu,

Shaoxiang Wang,

Alain Pagani,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yaxu and Arafa, Abdalla and Javanmardi, Alireza and Millerdurai, Christen and Hu, Jia Cheng and Wang, Shaoxiang and Pagani, Alain and Stricker, Didier}, title = {ReLaGS: Relational Language Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23826-23836} }
On Token's Dilemma: Dynamic MoE with Drift-Aware Token Assignment for Continual Learning of Large Vision Language Models: Chongyang Zhao,

Mingsong Li,

Haodong Lu,

Dong Gong; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chongyang and Li, Mingsong and Lu, Haodong and Gong, Dong}, title = {On Token's Dilemma: Dynamic MoE with Drift-Aware Token Assignment for Continual Learning of Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3941-3952} }
UARE: A Unified Vision-Language Model for Image Quality Assessment, Restoration, and Enhancement: Weiqi Li,

Xuanyu Zhang,

Bin Chen,

Jingfen Xie,

Yan Wang,

Kexin Zhang,

Junlin Li,

Li zhang,

Jian Zhang,

Shijie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiqi and Zhang, Xuanyu and Chen, Bin and Xie, Jingfen and Wang, Yan and Zhang, Kexin and Li, Junlin and zhang, Li and Zhang, Jian and Zhao, Shijie}, title = {UARE: A Unified Vision-Language Model for Image Quality Assessment, Restoration, and Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22689-22702} }
VES-RFT: Rewarding Visual Evidence Sensitivity to Mitigate Hallucinations in Large Vision-Language Models: Xuehe Hou,

Wenshuo Li,

Yali Li,

Han Shu,

Yuan Wang,

Xinghao Chen,

Shengjin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Xuehe and Li, Wenshuo and Li, Yali and Shu, Han and Wang, Yuan and Chen, Xinghao and Wang, Shengjin}, title = {VES-RFT: Rewarding Visual Evidence Sensitivity to Mitigate Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4168-4177} }
UVU: Improving Multimodal Understanding via Vision-Language Unified Autoregressive Paradigm: Zhehan Kan,

Xinghua Jiang,

Yanlin Liu,

Xiaochen Yang,

Zhixiang Wei,

Shifeng Liu,

Yubo Zhu,

Qingmin Liao,

Wenming Yang,

Xin Li,

Yinsong Liu,

Deqiang Jiang,

Xing Sun; [pdf] [supp]
[bibtex]
@InProceedings{Kan_2026_CVPR, author = {Kan, Zhehan and Jiang, Xinghua and Liu, Yanlin and Yang, Xiaochen and Wei, Zhixiang and Liu, Shifeng and Zhu, Yubo and Liao, Qingmin and Yang, Wenming and Li, Xin and Liu, Yinsong and Jiang, Deqiang and Sun, Xing}, title = {UVU: Improving Multimodal Understanding via Vision-Language Unified Autoregressive Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26230-26239} }
Bidirectional Normalizing Flow: From Data to Noise and Back: Yiyang Lu,

Qiao Sun,

Xianbang Wang,

Zhicheng Jiang,

Hanhong Zhao,

Kaiming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Yiyang and Sun, Qiao and Wang, Xianbang and Jiang, Zhicheng and Zhao, Hanhong and He, Kaiming}, title = {Bidirectional Normalizing Flow: From Data to Noise and Back}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2069-2078} }
WPT: World-to-Policy Transfer via Online World Model Distillation: Guangfeng Jiang,

Yueru Luo,

Jun Liu,

Yi Huang,

Yiyao Zhu,

Zhan Qu,

Dave Zhenyu Chen,

Bingbing Liu,

Xu Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Guangfeng and Luo, Yueru and Liu, Jun and Huang, Yi and Zhu, Yiyao and Qu, Zhan and Chen, Dave Zhenyu and Liu, Bingbing and Yan, Xu}, title = {WPT: World-to-Policy Transfer via Online World Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17842-17852} }
MaskDiME: Adaptive Masked Diffusion for Precise and Efficient Visual Counterfactual Explanations: Changlu Guo,

Anders Nymark Christensen,

Anders Bjorholm Dahl,

Morten Rieger Hannemose; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Changlu and Christensen, Anders Nymark and Dahl, Anders Bjorholm and Hannemose, Morten Rieger}, title = {MaskDiME: Adaptive Masked Diffusion for Precise and Efficient Visual Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24118-24128} }
EmoDiffTalk: Emotion-aware Diffusion for Editable 3D Gaussian Talking Head: Chang Liu,

Tianjiao Jing,

Chengcheng Ma,

Xuanqi Zhou,

Zhengxuan Lian,

Qin Jin,

Hongliang Yuan,

Shi-Sheng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chang and Jing, Tianjiao and Ma, Chengcheng and Zhou, Xuanqi and Lian, Zhengxuan and Jin, Qin and Yuan, Hongliang and Huang, Shi-Sheng}, title = {EmoDiffTalk: Emotion-aware Diffusion for Editable 3D Gaussian Talking Head}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18063-18073} }
4C4D: 4 Camera 4D Gaussian Splatting: Junsheng Zhou,

Zhifan Yang,

Liang Han,

Wenyuan Zhang,

Kanle Shi,

Shenkun Xu,

Yu-Shen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Junsheng and Yang, Zhifan and Han, Liang and Zhang, Wenyuan and Shi, Kanle and Xu, Shenkun and Liu, Yu-Shen}, title = {4C4D: 4 Camera 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11829-11839} }
Exploring Visual Pretraining for Learning Language Intelligence: Zhonghan Zhao,

Yiming Zhang,

Wenwei Zhang,

Haiteng Zhao,

Xingguang Wei,

Zhangwei Gao,

Kuikun Liu,

Yuzhe Gu,

Size Wu,

Haian Huang,

Jianfei Gao,

Haijun Lv,

Demin Song,

Yunhua Zhou,

Qipeng Guo,

Gaoang Wang,

Kai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zhonghan and Zhang, Yiming and Zhang, Wenwei and Zhao, Haiteng and Wei, Xingguang and Gao, Zhangwei and Liu, Kuikun and Gu, Yuzhe and Wu, Size and Huang, Haian and Gao, Jianfei and Lv, Haijun and Song, Demin and Zhou, Yunhua and Guo, Qipeng and Wang, Gaoang and Chen, Kai}, title = {Exploring Visual Pretraining for Learning Language Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31493-31503} }
CROWn: A Unified Framework for Anti-Aliased Downsampling and Phase-Calibrated Fusion in 3D Medical Segmentation: Xingru Huang,

Shuanghua Ye,

Zhao Huang,

Wenwen Tang,

Huiyu Zhou,

Zhiwen Zheng,

Jin Liu,

Xiaoshuai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xingru and Ye, Shuanghua and Huang, Zhao and Tang, Wenwen and Zhou, Huiyu and Zheng, Zhiwen and Liu, Jin and Zhang, Xiaoshuai}, title = {CROWn: A Unified Framework for Anti-Aliased Downsampling and Phase-Calibrated Fusion in 3D Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8514-8524} }
3D-Object Perception Transformer (3PT): Agastya Kalra,

Tim Salzmann,

Guy Stoppi,

Dmitrii Marin,

Rishav Agarwal,

Vage Taamazyan,

Martin Bokeloh,

Stefan Hinterstoisser,

Anton Boykov,

Alberto Dall'Olio,

Pravin Dangol,

Kartik Venkataraman,

Huaijin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Kalra_2026_CVPR, author = {Kalra, Agastya and Salzmann, Tim and Stoppi, Guy and Marin, Dmitrii and Agarwal, Rishav and Taamazyan, Vage and Bokeloh, Martin and Hinterstoisser, Stefan and Boykov, Anton and Dall'Olio, Alberto and Dangol, Pravin and Venkataraman, Kartik and Chen, Huaijin}, title = {3D-Object Perception Transformer (3PT)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25777-25787} }
InterPrior: Scaling Generative Control for Physics-Based Human-Object Interactions: Sirui Xu,

Samuel Schulter,

Morteza Ziyadi,

Xialin He,

Xiaohan Fei,

Yu-Xiong Wang,

Liang-Yan Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Sirui and Schulter, Samuel and Ziyadi, Morteza and He, Xialin and Fei, Xiaohan and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterPrior: Scaling Generative Control for Physics-Based Human-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23516-23527} }
FastGaMer: Efficient GainMap Learning for Practical Inverse Tone Mapping: Yuanshen Guan,

Ruikang Xu,

Chang Chen,

Yinuo Liao,

Dehua Song,

Fenglong Song,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Yuanshen and Xu, Ruikang and Chen, Chang and Liao, Yinuo and Song, Dehua and Song, Fenglong and Xiong, Zhiwei}, title = {FastGaMer: Efficient GainMap Learning for Practical Inverse Tone Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22703-22712} }
Chain-of-Frames: Advancing Video Understanding in Multimodal LLMs via Frame-Aware Reasoning: Sara Ghazanfari,

Francesco Croce,

Nicolas Flammarion,

Prashanth Krishnamurthy,

Farshad Khorrami,

Siddharth Garg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghazanfari_2026_CVPR, author = {Ghazanfari, Sara and Croce, Francesco and Flammarion, Nicolas and Krishnamurthy, Prashanth and Khorrami, Farshad and Garg, Siddharth}, title = {Chain-of-Frames: Advancing Video Understanding in Multimodal LLMs via Frame-Aware Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2746-2755} }
Linguistic Priors for Visual Decoupling: Towards Symmetric Vision-Brain Alignment: Dongjun Liu,

Weichen Dai,

Jingsheng Qian,

Honggang Liu,

Hangjie Yi,

Wanzeng Kong; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Dongjun and Dai, Weichen and Qian, Jingsheng and Liu, Honggang and Yi, Hangjie and Kong, Wanzeng}, title = {Linguistic Priors for Visual Decoupling: Towards Symmetric Vision-Brain Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7869-7878} }
Looking Beyond the Window: Global-Local Aligned CLIP for Training-free Open-Vocabulary Semantic Segmentation: ByeongCheol Lee,

Hyun Seok Seong,

Sangeek Hyun,

Gilhan Park,

WonJun Moon,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, ByeongCheol and Seong, Hyun Seok and Hyun, Sangeek and Park, Gilhan and Moon, WonJun and Heo, Jae-Pil}, title = {Looking Beyond the Window: Global-Local Aligned CLIP for Training-free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27686-27696} }
ReBaPL: Repulsive Bayesian Prompt Learning: Yassir Bendou,

Omar Ezzahir,

Eduardo Montesuma,

Gabriel Mahuas,

Victoria Shevchenko,

Mike Gartrell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bendou_2026_CVPR, author = {Bendou, Yassir and Ezzahir, Omar and Montesuma, Eduardo and Mahuas, Gabriel and Shevchenko, Victoria and Gartrell, Mike}, title = {ReBaPL: Repulsive Bayesian Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39962-39971} }
What Matters in Practical Learned Image Compression: Kedar Tatwawadi,

Parisa Rahimzadeh,

Zhanghao Sun,

Zhiqi Chen,

Ziyun Yang,

Sanjay Nair,

Divija Hasteer,

Oren Rippel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tatwawadi_2026_CVPR, author = {Tatwawadi, Kedar and Rahimzadeh, Parisa and Sun, Zhanghao and Chen, Zhiqi and Yang, Ziyun and Nair, Sanjay and Hasteer, Divija and Rippel, Oren}, title = {What Matters in Practical Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12095-12105} }
Model Merging in the Essential Subspace: Longhua Li,

Lei Qi,

Qi Tian,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Longhua and Qi, Lei and Tian, Qi and Geng, Xin}, title = {Model Merging in the Essential Subspace}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31145-31154} }
Expert-Teacher-Student Collaborative Learning for Domain Adaptive Object Detection: Yiming Cui,

Liang Li,

Haibing Yin,

Yuhan Gao,

Xichun Sheng,

Chenggang Yan; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Yiming and Li, Liang and Yin, Haibing and Gao, Yuhan and Sheng, Xichun and Yan, Chenggang}, title = {Expert-Teacher-Student Collaborative Learning for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25557-25567} }
DeepAlign: Mitigating Modality Conflict through Modality-Specific Alignment: Shuo Li,

Bingchen Miao,

Wendong Bu,

Juncheng Li,

Hanwang Zhang,

Fei Wu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shuo and Miao, Bingchen and Bu, Wendong and Li, Juncheng and Zhang, Hanwang and Wu, Fei}, title = {DeepAlign: Mitigating Modality Conflict through Modality-Specific Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7847-7858} }
SG-LoRA: Semantic-guided LoRA Parameters Generation: Miaoge Li,

Yang Chen,

Zhijie Rao,

Can Jiang,

Kang Wei,

Jingcai Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Miaoge and Chen, Yang and Rao, Zhijie and Jiang, Can and Wei, Kang and Guo, Jingcai}, title = {SG-LoRA: Semantic-guided LoRA Parameters Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22206-22216} }
G$^2$VLM: Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning: Wenbo Hu,

Jingli Lin,

Yilin Long,

Yunlong Ran,

Lihan Jiang,

Yifan Wang,

Chenming Zhu,

Runsen Xu,

Tai Wang,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Wenbo and Lin, Jingli and Long, Yilin and Ran, Yunlong and Jiang, Lihan and Wang, Yifan and Zhu, Chenming and Xu, Runsen and Wang, Tai and Pang, Jiangmiao}, title = {G\${\textasciicircum}2\$VLM: Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9535-9546} }
RecTok: Reconstruction Distillation along Rectified Flow: Qingyu Shi,

Size Wu,

Jinbin Bai,

Kaidong Yu,

Yujing Wang,

Yunhai Tong,

Xiangtai Li,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Qingyu and Wu, Size and Bai, Jinbin and Yu, Kaidong and Wang, Yujing and Tong, Yunhai and Li, Xiangtai and Li, Xuelong}, title = {RecTok: Reconstruction Distillation along Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40685-40695} }
FunFact: Building Probabilistic Functional 3D Scene Graphs via Factor-Graph Reasoning: Zhengyu Fu,

René Zurbrügg,

Kaixian Qu,

Marc Pollefeys,

Marco Hutter,

Hermann Blum,

Zuria Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Zhengyu and Zurbr\"ugg, Ren\'e and Qu, Kaixian and Pollefeys, Marc and Hutter, Marco and Blum, Hermann and Bauer, Zuria}, title = {FunFact: Building Probabilistic Functional 3D Scene Graphs via Factor-Graph Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23848-23858} }
OpenDPR: Open-Vocabulary Change Detection via Vision-Centric Diffusion-Guided Prototype Retrieval for Remote Sensing Imagery: Qi Guo,

Jue Wang,

Yinhe Liu,

Yanfei Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Qi and Wang, Jue and Liu, Yinhe and Zhong, Yanfei}, title = {OpenDPR: Open-Vocabulary Change Detection via Vision-Centric Diffusion-Guided Prototype Retrieval for Remote Sensing Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20399-20409} }
Virtual Immunohistochemistry Staining with Dual-Aligned Multi-Task Feature Guidance: Shigeng Xie,

Hongming Xu,

Guiyang Jiang,

Tuomo Rossi,

Tommi Kärkkäinen,

Fengyu Cong; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Shigeng and Xu, Hongming and Jiang, Guiyang and Rossi, Tuomo and K\"arkk\"ainen, Tommi and Cong, Fengyu}, title = {Virtual Immunohistochemistry Staining with Dual-Aligned Multi-Task Feature Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35311-35320} }
NeighborMAE: Exploiting Spatial Dependencies between Neighboring Earth Observation Images in Masked Autoencoders Pretraining: Liang Zeng,

Valerio Marsocci,

Wufan Zhao,

Andrea Nascetti,

Maarten Vergauwen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Liang and Marsocci, Valerio and Zhao, Wufan and Nascetti, Andrea and Vergauwen, Maarten}, title = {NeighborMAE: Exploiting Spatial Dependencies between Neighboring Earth Observation Images in Masked Autoencoders Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20597-20607} }
Fixed Anchors Are Not Enough: Dynamic Retrieval and Persistent Homology for Dataset Distillation: Muquan Li,

Hang Gou,

Yingyi Ma,

Rongzheng Wang,

Ke Qin,

Tao He; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Muquan and Gou, Hang and Ma, Yingyi and Wang, Rongzheng and Qin, Ke and He, Tao}, title = {Fixed Anchors Are Not Enough: Dynamic Retrieval and Persistent Homology for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33963-33972} }
A Cross-view Fusion Framework for Robust 6-DoF Grasp Pose Estimation: Kangjian Zhu,

Haobo Jiang,

Jianjun Qian,

Jin Xie; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kangjian and Jiang, Haobo and Qian, Jianjun and Xie, Jin}, title = {A Cross-view Fusion Framework for Robust 6-DoF Grasp Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28052-28061} }
ViT$^3$: Unlocking Test-Time Training in Vision: Dongchen Han,

Yining Li,

Tianyu Li,

Zixuan Cao,

Ziming Wang,

Jun Song,

Yu Cheng,

Bo Zheng,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Dongchen and Li, Yining and Li, Tianyu and Cao, Zixuan and Wang, Ziming and Song, Jun and Cheng, Yu and Zheng, Bo and Huang, Gao}, title = {ViT\${\textasciicircum}3\$: Unlocking Test-Time Training in Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {51-61} }
MAD: Motion Appearance Decoupling for efficient Driving World Models: Ahmad Rahimi,

Valentin Gerard,

Eloi Zablocki,

Matthieu Cord,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahimi_2026_CVPR, author = {Rahimi, Ahmad and Gerard, Valentin and Zablocki, Eloi and Cord, Matthieu and Alahi, Alexandre}, title = {MAD: Motion Appearance Decoupling for efficient Driving World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18364-18374} }
Learning to Generate via Understanding: Understanding-Driven Intrinsic Rewarding for Unified Multimodal Models: Jiadong Pan,

Liang Li,

Yuxin Peng,

Yu-Ming Tang,

Shuohuan Wang,

Yu Sun,

Hua Wu,

Qingming Huang,

Haifeng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Jiadong and Li, Liang and Peng, Yuxin and Tang, Yu-Ming and Wang, Shuohuan and Sun, Yu and Wu, Hua and Huang, Qingming and Wang, Haifeng}, title = {Learning to Generate via Understanding: Understanding-Driven Intrinsic Rewarding for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22174-22184} }
CASPA: Graph-Structured Concept Anchors for Modality-Agnostic Adaptation in Vision-Language Models: Abhiroop Chatterjee,

Susmita Ghosh,

Ashish Ghosh,

Emmett Ientilucci; [pdf] [supp]
[bibtex]
@InProceedings{Chatterjee_2026_CVPR, author = {Chatterjee, Abhiroop and Ghosh, Susmita and Ghosh, Ashish and Ientilucci, Emmett}, title = {CASPA: Graph-Structured Concept Anchors for Modality-Agnostic Adaptation in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31566-31576} }
4D-RGPT: Toward Region-level 4D Understanding via Perceptual Distillation: Chiao-An Yang,

Ryo Hachiuma,

Sifei Liu,

Subhashree Radhakrishnan,

Raymond A. Yeh,

Yu-Chiang Frank Wang,

Min-Hung Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Chiao-An and Hachiuma, Ryo and Liu, Sifei and Radhakrishnan, Subhashree and Yeh, Raymond A. and Wang, Yu-Chiang Frank and Chen, Min-Hung}, title = {4D-RGPT: Toward Region-level 4D Understanding via Perceptual Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31042-31053} }
DARC: Dual Adjustment Reasoning with Counterfactuals for Trustworthy Chest X-ray Classification: Zhifang Liao,

Junhao Li,

HaoKang Ding,

Yucheng Song; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Zhifang and Li, Junhao and Ding, HaoKang and Song, Yucheng}, title = {DARC: Dual Adjustment Reasoning with Counterfactuals for Trustworthy Chest X-ray Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28234-28243} }
Explaining Object Detectors via Collective Contribution of Pixels: Toshinori Yamauchi,

Hiroshi Kera,

Kazuhiko Kawamoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamauchi_2026_CVPR, author = {Yamauchi, Toshinori and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Explaining Object Detectors via Collective Contribution of Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17046-17056} }
CG-Reasoner: Centroid-Guided Positional Reasoning Segmentation for Medical Imaging with a Robust Visual-Text Consistency Metric: Lakshmikar Reddy Polamreddy,

Ming Ma; [pdf] [supp]
[bibtex]
@InProceedings{Polamreddy_2026_CVPR, author = {Polamreddy, Lakshmikar Reddy and Ma, Ming}, title = {CG-Reasoner: Centroid-Guided Positional Reasoning Segmentation for Medical Imaging with a Robust Visual-Text Consistency Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1472-1481} }
HUMAPS-4D: A Multimodal Dataset for HUman Motion Analysis with Physiological and Semantic informations: Matthieu Dabrowski,

Ouala Ben Jemaa,

Benjamin Allaert; [pdf] [supp]
[bibtex]
@InProceedings{Dabrowski_2026_CVPR, author = {Dabrowski, Matthieu and Ben Jemaa, Ouala and Allaert, Benjamin}, title = {HUMAPS-4D: A Multimodal Dataset for HUman Motion Analysis with Physiological and Semantic informations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21188-21197} }
CHIRP dataset: towards long-term, individual-level, behavioral monitoring of bird populations in the wild: Alex Hoi Hang Chan,

Neha Singhal,

Onur Kocahan,

Andrea Meltzer,

Saverio Lubrano,

Miyako H. Warrington,

Michael Griesser,

Fumihiro Kano,

Hemal Naik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chan_2026_CVPR, author = {Chan, Alex Hoi Hang and Singhal, Neha and Kocahan, Onur and Meltzer, Andrea and Lubrano, Saverio and Warrington, Miyako H. and Griesser, Michael and Kano, Fumihiro and Naik, Hemal}, title = {CHIRP dataset: towards long-term, individual-level, behavioral monitoring of bird populations in the wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18428-18439} }
Learning to Select Visual Tools from Experience: Zeyi Huang,

Yuyang Ji,

Anirudh Sundara Rajan,

Zefan Cai,

Wen Xiao,

Haohan Wang,

Junjie Hu,

Yong Jae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zeyi and Ji, Yuyang and Rajan, Anirudh Sundara and Cai, Zefan and Xiao, Wen and Wang, Haohan and Hu, Junjie and Lee, Yong Jae}, title = {Learning to Select Visual Tools from Experience}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4783-4793} }
MuM: Multi-View Masked Image Modeling for 3D Vision: David Nordström,

Johan Edstedt,

Fredrik Kahl,

Georg Bökman; [pdf] [supp]
[bibtex]
@InProceedings{Nordstrom_2026_CVPR, author = {Nordstr\"om, David and Edstedt, Johan and Kahl, Fredrik and B\"okman, Georg}, title = {MuM: Multi-View Masked Image Modeling for 3D Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21736-21747} }
LightSplat: Fast and Memory-Efficient Open-Vocabulary 3D Scene Understanding in Five Seconds: Jaehun Bang,

Jinhyeok Kim,

Minji Kim,

Seungheon Jeong,

Kyungdon Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bang_2026_CVPR, author = {Bang, Jaehun and Kim, Jinhyeok and Kim, Minji and Jeong, Seungheon and Joo, Kyungdon}, title = {LightSplat: Fast and Memory-Efficient Open-Vocabulary 3D Scene Understanding in Five Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19812-19821} }
NEC-Diff: Noise-Robust Event-RAW Complementary Diffusion for Seeing Motion in Extreme Darkness: Haoyue Liu,

Jinghan Xu,

Luxin Feng,

Hanyu Zhou,

Haozhi Zhao,

Yi Chang,

Luxin Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Haoyue and Xu, Jinghan and Feng, Luxin and Zhou, Hanyu and Zhao, Haozhi and Chang, Yi and Yan, Luxin}, title = {NEC-Diff: Noise-Robust Event-RAW Complementary Diffusion for Seeing Motion in Extreme Darkness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22281-22290} }
UAVLight: A Benchmark for Illumination-Robust 3D Reconstruction in Unmanned Aerial Vehicle (UAV) Scenes: Kang Du,

Xue Liao,

Junpeng Xia,

Chaozheng Guo,

Yi Gu,

Yirui Guan,

Duotun Wang,

Sheng Huang,

Zeyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Kang and Liao, Xue and Xia, Junpeng and Guo, Chaozheng and Gu, Yi and Guan, Yirui and Wang, Duotun and Huang, Sheng and Wang, Zeyu}, title = {UAVLight: A Benchmark for Illumination-Robust 3D Reconstruction in Unmanned Aerial Vehicle (UAV) Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5670-5679} }
GeoSANE: Learning Geospatial Representations from Models, Not Data: Joëlle Hanna,

Damian Falk,

Stella X. Yu,

Damian Borth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hanna_2026_CVPR, author = {Hanna, Jo\"elle and Falk, Damian and Yu, Stella X. and Borth, Damian}, title = {GeoSANE: Learning Geospatial Representations from Models, Not Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27804-27814} }
VidTAG: Temporally Aligned Video to GPS Geolocalization with Denoising Sequence Prediction at a Global Scale: Parth Parag Kulkarni,

Rohit Gupta,

Prakash Chandra Chhipa,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Parth Parag and Gupta, Rohit and Chhipa, Prakash Chandra and Shah, Mubarak}, title = {VidTAG: Temporally Aligned Video to GPS Geolocalization with Denoising Sequence Prediction at a Global Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23977-23987} }
EasyOmnimatte: Taming Pretrained Inpainting Diffusion Models for End-to-End Video Layered Decompositio: Yihan Hu,

Xuelin Chen,

Xiaodong Cun; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yihan and Chen, Xuelin and Cun, Xiaodong}, title = {EasyOmnimatte: Taming Pretrained Inpainting Diffusion Models for End-to-End Video Layered Decompositio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43341-43351} }
DemoFunGrasp: Universal Dexterous Functional Grasping via Demonstration-Editing Reinforcement Learning: Chuan Mao,

Haoqi Yuan,

Ziye Huang,

Chaoyi Xu,

Kai Ma,

Zongqing Lu; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Chuan and Yuan, Haoqi and Huang, Ziye and Xu, Chaoyi and Ma, Kai and Lu, Zongqing}, title = {DemoFunGrasp: Universal Dexterous Functional Grasping via Demonstration-Editing Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {986-995} }
Interactive Episodic Memory with User Feedback: Nikesh Subedi,

Loris Bazzani,

Ziad Al-Halah; [pdf] [supp]
[bibtex]
@InProceedings{Subedi_2026_CVPR, author = {Subedi, Nikesh and Bazzani, Loris and Al-Halah, Ziad}, title = {Interactive Episodic Memory with User Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38826-38835} }
DABO: Difficulty-Aware Bayesian Optimization with Diffusion-Learned Priors: Mengyang Li,

Pinlong Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Zhao, Pinlong}, title = {DABO: Difficulty-Aware Bayesian Optimization with Diffusion-Learned Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6126-6135} }
PolySLGen: Online Multimodal Speaking-Listening Reaction Generation in Polyadic Interaction: Zhi-Yi Lin,

Thomas Markhorst,

Jouh Yeong Chew,

Xucong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Zhi-Yi and Markhorst, Thomas and Chew, Jouh Yeong and Zhang, Xucong}, title = {PolySLGen: Online Multimodal Speaking-Listening Reaction Generation in Polyadic Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29379-29390} }
Bypassing the Transport Plan: Dynamic Reweighting for Out-of-Distribution Detection with Optimal Transport: Yang Xiao,

Weiming Liu,

Jun Dan,

Tengyue Xu,

Fan Wang,

Hua Yu,

Junhao Dong,

Jiao Liu,

Shunjie Dong,

Lianyong Qi; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Yang and Liu, Weiming and Dan, Jun and Xu, Tengyue and Wang, Fan and Yu, Hua and Dong, Junhao and Liu, Jiao and Dong, Shunjie and Qi, Lianyong}, title = {Bypassing the Transport Plan: Dynamic Reweighting for Out-of-Distribution Detection with Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32026-32036} }
MARCO: Navigating the Unseen Space of Semantic Correspondence: Claudia Cuttano,

Gabriele Trivigno,

Carlo Masone,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cuttano_2026_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Masone, Carlo and Roth, Stefan}, title = {MARCO: Navigating the Unseen Space of Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21649-21658} }
Realiz3D: 3D Generation Made Photorealistic via Domain-Aware Learning: Ido Sobol,

Kihyuk Sohn,

Yoav Blum,

Egor Zakharov,

Max Bluvstein,

Andrea Vedaldi,

Or Litany; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sobol_2026_CVPR, author = {Sobol, Ido and Sohn, Kihyuk and Blum, Yoav and Zakharov, Egor and Bluvstein, Max and Vedaldi, Andrea and Litany, Or}, title = {Realiz3D: 3D Generation Made Photorealistic via Domain-Aware Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27072-27081} }
Brewing Stronger Features: Dual-Teacher Distillation for Multispectral Earth Observation: Filip Wolf,

Blaž Rolih,

Luka Čehovin Zajc; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wolf_2026_CVPR, author = {Wolf, Filip and Rolih, Bla\v{z} and Zajc, Luka \v{C}ehovin}, title = {Brewing Stronger Features: Dual-Teacher Distillation for Multispectral Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27815-27826} }
Differentiable Vector Quantization for Rate-Distortion Optimization of Generative Image Compression: Shiyin Jiang,

Wei Long,

Minghao Han,

Zhenghao Chen,

Ce Zhu,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Shiyin and Long, Wei and Han, Minghao and Chen, Zhenghao and Zhu, Ce and Gu, Shuhang}, title = {Differentiable Vector Quantization for Rate-Distortion Optimization of Generative Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14440-14450} }
Advancing Image Classification with Discrete Diffusion Classification Modeling: Omer Belhasin,

Shelly Golan,

Ran El-Yaniv,

Michael Elad; [pdf] [supp]
[bibtex]
@InProceedings{Belhasin_2026_CVPR, author = {Belhasin, Omer and Golan, Shelly and El-Yaniv, Ran and Elad, Michael}, title = {Advancing Image Classification with Discrete Diffusion Classification Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {124-134} }
CoopDiff: A Diffusion-Guided Approach for Cooperation under Corruptions: Gong Chen,

Chaokun Zhang,

Pengcheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Lv, Pengcheng}, title = {CoopDiff: A Diffusion-Guided Approach for Cooperation under Corruptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11546-11555} }
Spatial-Spectral Residuals Informed Diffusion Neural Operator for Pan-sharpening: Jiahan Huang,

Ran Ran,

Junming Hou,

Zihao Chen,

Xiaofeng Cong,

Junling Li,

Liang-Jian Deng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiahan and Ran, Ran and Hou, Junming and Chen, Zihao and Cong, Xiaofeng and Li, Junling and Deng, Liang-Jian}, title = {Spatial-Spectral Residuals Informed Diffusion Neural Operator for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23642-23651} }
AntiStyler: Defending Object Detection Models Against Adversarial Patch Attacks Using Style Removal: Idan Yankelev,

Edita Grolman,

Yarin Yerushalmi Levi,

Amit Giloni,

Omer Hofman,

Toshiya Shimizu,

Yuval Elovici,

Asaf Shabtai; [pdf] [supp]
[bibtex]
@InProceedings{Yankelev_2026_CVPR, author = {Yankelev, Idan and Grolman, Edita and Levi, Yarin Yerushalmi and Giloni, Amit and Hofman, Omer and Shimizu, Toshiya and Elovici, Yuval and Shabtai, Asaf}, title = {AntiStyler: Defending Object Detection Models Against Adversarial Patch Attacks Using Style Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27936-27945} }
RecoverMark: Robust Watermarking for Localization and Recovery of Manipulated Faces: Haonan An,

Xiaohui Ye,

Guang Hua,

Yihang Tao,

Hangcheng Cao,

Xiangyu Yu,

Yuguang Fang; [pdf] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Haonan and Ye, Xiaohui and Hua, Guang and Tao, Yihang and Cao, Hangcheng and Yu, Xiangyu and Fang, Yuguang}, title = {RecoverMark: Robust Watermarking for Localization and Recovery of Manipulated Faces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8587-8597} }
EMGauss: Continuous Slice-to-3D Reconstruction via Dynamic Gaussian Modeling in Volume Electron Microscopy: Yumeng He,

Zanwei Zhou,

Yekun Zheng,

Chen Liang,

Yunbo Wang,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yumeng and Zhou, Zanwei and Zheng, Yekun and Liang, Chen and Wang, Yunbo and Yang, Xiaokang}, title = {EMGauss: Continuous Slice-to-3D Reconstruction via Dynamic Gaussian Modeling in Volume Electron Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15606-15615} }
A Stitch in Time: Learning Procedural Workflow via Self-Supervised Plackett-Luce Ranking: Chengan Che,

Chao Wang,

Xinyue Chen,

Sophia Tsoka,

Luis C. Garcia-Peraza-Herrera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Chen, Xinyue and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {A Stitch in Time: Learning Procedural Workflow via Self-Supervised Plackett-Luce Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17000-17010} }
ConeSep: Cone-based Robust Noise-Unlearning Compositional Network for Composed Image Retrieval: Zixu Li,

Yupeng Hu,

Zhiwei Chen,

Mingyu Zhang,

Zhiheng Fu,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zixu and Hu, Yupeng and Chen, Zhiwei and Zhang, Mingyu and Fu, Zhiheng and Nie, Liqiang}, title = {ConeSep: Cone-based Robust Noise-Unlearning Compositional Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16897-16909} }
TDATR: Improving End-to-End Table Recognition via Table Detail-Aware Learning and Cell-Level Visual Alignment: Chunxia Qin,

Chenyu Liu,

Pengcheng Xia,

Jun Du,

Baocai Yin,

Bing Yin,

Cong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Chunxia and Liu, Chenyu and Xia, Pengcheng and Du, Jun and Yin, Baocai and Yin, Bing and Liu, Cong}, title = {TDATR: Improving End-to-End Table Recognition via Table Detail-Aware Learning and Cell-Level Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36838-36849} }
Foundry: Distilling 3D Foundation Models for the Edge: Guillaume Letellier,

Siddharth Srivastava,

Frederic Jurie,

Gaurav Sharma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Letellier_2026_CVPR, author = {Letellier, Guillaume and Srivastava, Siddharth and Jurie, Frederic and Sharma, Gaurav}, title = {Foundry: Distilling 3D Foundation Models for the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17194-17203} }
Seeing Through Blur: Tackling Defocus in Spike-Based Imaging: Xiantao Ma,

Siwei Dong,

Lin Zhu,

Lizhi Wang,

Hua Huang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Xiantao and Dong, Siwei and Zhu, Lin and Wang, Lizhi and Huang, Hua}, title = {Seeing Through Blur: Tackling Defocus in Spike-Based Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19676-19685} }
Generalized and Personalized Federated Learning with Black-Box Foundation Models via Orthogonal Transformations: Eun Gyung Kong,

Jewon Yeom,

Yonghoon Jeon,

Taesup Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Eun Gyung and Yeom, Jewon and Jeon, Yonghoon and Kim, Taesup}, title = {Generalized and Personalized Federated Learning with Black-Box Foundation Models via Orthogonal Transformations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24567-24576} }
TopoCL: Topological Contrastive Learning for Medical Imaging: Guangyu Meng,

Pengfei Gu,

Peixian Liang,

John P. Lalor,

Erin Wolf Chambers,

Danny Z. Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Guangyu and Gu, Pengfei and Liang, Peixian and Lalor, John P. and Chambers, Erin Wolf and Chen, Danny Z.}, title = {TopoCL: Topological Contrastive Learning for Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42681-42690} }
Scaling Zero-Shot Reference-to-Video Generation: Zijian Zhou,

Shikun Liu,

Haozhe Liu,

Haonan Qiu,

Zhaochong An,

Weiming Ren,

Zhiheng Liu,

Xiaoke Huang,

Kam-Woh Ng,

Tian Xie,

Xiao Han,

Yuren Cong,

Hang Li,

Chuyan Zhu,

Aditya Patel,

Tao Xiang,

Sen He; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijian and Liu, Shikun and Liu, Haozhe and Qiu, Haonan and An, Zhaochong and Ren, Weiming and Liu, Zhiheng and Huang, Xiaoke and Ng, Kam-Woh and Xie, Tian and Han, Xiao and Cong, Yuren and Li, Hang and Zhu, Chuyan and Patel, Aditya and Xiang, Tao and He, Sen}, title = {Scaling Zero-Shot Reference-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9253-9262} }
MAPS: Preserving Vision-Language Representations via Module-Wise Proximity Scheduling for Better Vision-Language-Action Generalization: Chengyue Huang,

Mellon M. Zhang,

Robert Azarcon,

Glen Chou,

Zsolt Kira; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Chengyue and Zhang, Mellon M. and Azarcon, Robert and Chou, Glen and Kira, Zsolt}, title = {MAPS: Preserving Vision-Language Representations via Module-Wise Proximity Scheduling for Better Vision-Language-Action Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32451-32462} }
$\oslash$ Source Models Leak What They Shouldn't $\nrightarrow$: Unlearning Zero-Shot Transfer in Domain Adaptation Through Adversarial Optimization: Arnav Devalapally,

Poornima Jain,

Kartik Srinivas,

Vineeth N. Balasubramanian; [pdf] [supp]
[bibtex]
@InProceedings{Devalapally_2026_CVPR, author = {Devalapally, Arnav and Jain, Poornima and Srinivas, Kartik and Balasubramanian, Vineeth N.}, title = {\${\textbackslash}oslash\$ Source Models Leak What They Shouldn't \${\textbackslash}nrightarrow\$: Unlearning Zero-Shot Transfer in Domain Adaptation Through Adversarial Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1543-1553} }
Spectral-Geometric Neural Fields for Pose-Free LiDAR View Synthesis: Yinuo Jiang,

Jun Cheng,

Yiran Wang,

Cheng Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yinuo and Cheng, Jun and Wang, Yiran and Cheng, Cheng}, title = {Spectral-Geometric Neural Fields for Pose-Free LiDAR View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2993-3003} }
SVAgent: Storyline-guided Long Video Understanding via Cross-Modal Multi-Agent Collaboration: Zhongyu Yang,

Zuhao Yang,

Shuo Zhan,

Tan Yue,

Wei Pang,

Yingfang Yuan; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhongyu and Yang, Zuhao and Zhan, Shuo and Yue, Tan and Pang, Wei and Yuan, Yingfang}, title = {SVAgent: Storyline-guided Long Video Understanding via Cross-Modal Multi-Agent Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24062-24072} }
VDOT: Efficient Unified Video Creation via Optimal Transport Distillation: Yutong Wang,

Haiyu Zhang,

Tianfan Xue,

Yu Qiao,

Yaohui Wang,

Chang Xu,

Xinyuan Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yutong and Zhang, Haiyu and Xue, Tianfan and Qiao, Yu and Wang, Yaohui and Xu, Chang and Chen, Xinyuan}, title = {VDOT: Efficient Unified Video Creation via Optimal Transport Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9273-9283} }
Dynamic Exposure Burst Image Restoration: Woohyeok Kim,

Jaesung Rim,

Daeyeon Kim,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Woohyeok and Rim, Jaesung and Kim, Daeyeon and Cho, Sunghyun}, title = {Dynamic Exposure Burst Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15550-15560} }
TRivia: Self-supervised Fine-tuning of Vision-Language Models for Table Recognition: Junyuan Zhang,

Bin Wang,

Qintong Zhang,

Fan Wu,

Zichen Wen,

Jialin Lu,

Junjie Shan,

Ziqi Zhao,

Shuya Yang,

Ziling Wang,

Ziyang Miao,

Huaping Zhong,

Yuhang Zang,

Xiaoyi Dong,

Ka-Ho Chow,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junyuan and Wang, Bin and Zhang, Qintong and Wu, Fan and Wen, Zichen and Lu, Jialin and Shan, Junjie and Zhao, Ziqi and Yang, Shuya and Wang, Ziling and Miao, Ziyang and Zhong, Huaping and Zang, Yuhang and Dong, Xiaoyi and Chow, Ka-Ho and He, Conghui}, title = {TRivia: Self-supervised Fine-tuning of Vision-Language Models for Table Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33196-33206} }
Hyperbolic Relational Prompts for Intersectional Fairness in Medical VLMs: Jiayu Qian,

Zongxian Yang,

Guanxing Chen,

Pengwei Hu,

KC Tan,

Yan Wang,

Yu-An Huang,

Zhi-An Huang; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Jiayu and Yang, Zongxian and Chen, Guanxing and Hu, Pengwei and Tan, KC and Wang, Yan and Huang, Yu-An and Huang, Zhi-An}, title = {Hyperbolic Relational Prompts for Intersectional Fairness in Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13712-13721} }
AudioStory: Generating Long-Form Narrative Audio with Large Language Models: Yuxin Guo,

Teng Wang,

Yuying Ge,

Shijie Ma,

Yixiao Ge,

Wei Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yuxin and Wang, Teng and Ge, Yuying and Ma, Shijie and Ge, Yixiao and Zou, Wei}, title = {AudioStory: Generating Long-Form Narrative Audio with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37735-37744} }
Perception Characteristics Distance: Measuring Stability and Robustness of Perception System in Dynamic Conditions under a Certain Decision Rule: Boyu Jiang,

Liang Shi,

Zhengzhi Lin,

Lanxin Xiang,

Loren Stowe,

Feng Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Boyu and Shi, Liang and Lin, Zhengzhi and Xiang, Lanxin and Stowe, Loren and Guo, Feng}, title = {Perception Characteristics Distance: Measuring Stability and Robustness of Perception System in Dynamic Conditions under a Certain Decision Rule}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4829-4838} }
DialogueVPR: Towards Conversational Visual Place Recognition: Yukun Song,

Changwei Wang,

Xingtian Pei,

Shibiao Xu,

Wenhao Xu,

Shunpeng Chen,

Yu Zhang,

Ke Zhang,

Rongtao Xu,

Xuxiang Feng,

Pengyang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yukun and Wang, Changwei and Pei, Xingtian and Xu, Shibiao and Xu, Wenhao and Chen, Shunpeng and Zhang, Yu and Zhang, Ke and Xu, Rongtao and Feng, Xuxiang and Wang, Pengyang}, title = {DialogueVPR: Towards Conversational Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41100-41110} }
RC-NF: Robot-Conditioned Normalizing Flow for Real-Time Anomaly Detection in Robotic Manipulation: Shijie Zhou,

Bin Zhu,

Jiarui Yang,

Xiangyu Zhao,

Jingjing Chen,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shijie and Zhu, Bin and Yang, Jiarui and Zhao, Xiangyu and Chen, Jingjing and Jiang, Yu-Gang}, title = {RC-NF: Robot-Conditioned Normalizing Flow for Real-Time Anomaly Detection in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43050-43060} }
ClipGStream: Clip-Stream Gaussian Splatting for Any Length and Any Motion Multi-View Dynamic Scene Reconstruction: Jie Liang,

Jiahao Wu,

Chao Wang,

Jiayu Yang,

Xiaoyun Zheng,

Kaiqiang Xiong,

Zhanke Wang,

Jinbo Yan,

Feng Gao,

Ronggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Jie and Wu, Jiahao and Wang, Chao and Yang, Jiayu and Zheng, Xiaoyun and Xiong, Kaiqiang and Wang, Zhanke and Yan, Jinbo and Gao, Feng and Wang, Ronggang}, title = {ClipGStream: Clip-Stream Gaussian Splatting for Any Length and Any Motion Multi-View Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41022-41032} }
RelightAnyone: A Generalized Relightable 3D Gaussian Head Model: Yingyan Xu,

Studios 0000-0002-8076-1947,

Pramod Rao,

Sebastian Weiss,

Studios blank,

Gaspard Zoss,

Studios blank,

Markus Gross,

Studios,

ETH Zurich blank,

Christian Theobalt,

Marc Habermann,

Derek Bradley,

Studios blank; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yingyan and 0000-0002-8076-1947, Studios and Rao, Pramod and Weiss, Sebastian and blank, Studios and Zoss, Gaspard and blank, Studios and Gross, Markus and Studios and blank, ETH Zurich and Theobalt, Christian and Habermann, Marc and Bradley, Derek and blank, Studios}, title = {RelightAnyone: A Generalized Relightable 3D Gaussian Head Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25258-25269} }
UniCorrn: Unified Correspondence Transformer Across 2D and 3D: Prajnan Goswami,

Tianye Ding,

Feng Liu,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goswami_2026_CVPR, author = {Goswami, Prajnan and Ding, Tianye and Liu, Feng and Jiang, Huaizu}, title = {UniCorrn: Unified Correspondence Transformer Across 2D and 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9943-9954} }
GIFT: Global Irreplaceability Frame Targeting for Efficient Video Understanding: Junpeng Ma,

Sashuai Zhou,

Guanghao Li,

Xin Gao,

Yue Cao,

Hengyu Zeng,

Yuxiang Yan,

Zhibin Wang,

Jun Song,

Bo Zheng,

Shanghang Zhang,

Jian Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Junpeng and Zhou, Sashuai and Li, Guanghao and Gao, Xin and Cao, Yue and Zeng, Hengyu and Yan, Yuxiang and Wang, Zhibin and Song, Jun and Zheng, Bo and Zhang, Shanghang and Pu, Jian}, title = {GIFT: Global Irreplaceability Frame Targeting for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25610-25620} }
FEAT: Fashion Editing and Try-On from Any Design: Soye Kwon,

Keonyoung Lee,

Dahuin Jung,

Jaekoo Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Soye and Lee, Keonyoung and Jung, Dahuin and Lee, Jaekoo}, title = {FEAT: Fashion Editing and Try-On from Any Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22080-22089} }
TopoMesh: High-Fidelity Mesh Autoencoding via Topological Unification: Guan Luo,

Xiu Li,

Rui Chen,

Xuanyu Yi,

Jing Lin,

Chia Hao Chen,

Jiahang Liu,

Song-Hai Zhang,

Jianfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Guan and Li, Xiu and Chen, Rui and Yi, Xuanyu and Lin, Jing and Chen, Chia Hao and Liu, Jiahang and Zhang, Song-Hai and Zhang, Jianfeng}, title = {TopoMesh: High-Fidelity Mesh Autoencoding via Topological Unification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27082-27092} }
TALO: Pushing 3D Vision Foundation Models Towards Globally Consistent Online Reconstruction: Fengyi Zhang,

Tianjun Zhang,

Kasra Khosoussi,

Zheng Zhang,

Zi Huang,

Yadan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Fengyi and Zhang, Tianjun and Khosoussi, Kasra and Zhang, Zheng and Huang, Zi and Luo, Yadan}, title = {TALO: Pushing 3D Vision Foundation Models Towards Globally Consistent Online Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21870-21879} }
Why Not Hyperparameter-Friendly Optimisation? A Monotonic Adaptive Norm Rescaling Approach For Long-Tailed Recognition: Shuo Zhang,

Chenqi Li,

Tingting Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuo and Li, Chenqi and Zhu, Tingting}, title = {Why Not Hyperparameter-Friendly Optimisation? A Monotonic Adaptive Norm Rescaling Approach For Long-Tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36947-36956} }
Multi-view Pyramid Transformer: Look Coarser to See Broader: Gyeongjin Kang,

Seungkwon Yang,

Seungtae Nam,

Younggeun Lee,

Jungwoo Kim,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Gyeongjin and Yang, Seungkwon and Nam, Seungtae and Lee, Younggeun and Kim, Jungwoo and Park, Eunbyung}, title = {Multi-view Pyramid Transformer: Look Coarser to See Broader}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37380-37390} }
EasyV2V: A High-quality Instruction-based Video Editing Framework: Jinjie Mai,

Chaoyang Wang,

Gordon Guocheng Qian,

Willi Menapace,

Sergey Tulyakov,

Bernard Ghanem,

Peter Wonka,

Ashkan Mirzaei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Jinjie and Wang, Chaoyang and Qian, Gordon Guocheng and Menapace, Willi and Tulyakov, Sergey and Ghanem, Bernard and Wonka, Peter and Mirzaei, Ashkan}, title = {EasyV2V: A High-quality Instruction-based Video Editing Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30435-30445} }
Open-Ended Instruction Realization with LLM-Enabled Multi-Planner Scheduling in Autonomous Vehicles: Jiawei Liu,

Xun Gong,

Fen Fang,

Muli Yang,

Bohao Qu,

Yunfeng Hu,

Hong Chen,

Xulei Yang,

Qing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiawei and Gong, Xun and Fang, Fen and Yang, Muli and Qu, Bohao and Hu, Yunfeng and Chen, Hong and Yang, Xulei and Guo, Qing}, title = {Open-Ended Instruction Realization with LLM-Enabled Multi-Planner Scheduling in Autonomous Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32070-32081} }
VIMCAN: Visual-Inertial 3D Human Pose Estimation with Hybrid Mamba-Cross-Attention Network: Zepeng Yang,

Junxuan Bai,

Hao Li,

Ju Dai,

Junjun Pan,

Yongfeng Yin,

Bin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zepeng and Bai, Junxuan and Li, Hao and Dai, Ju and Pan, Junjun and Yin, Yongfeng and Li, Bin}, title = {VIMCAN: Visual-Inertial 3D Human Pose Estimation with Hybrid Mamba-Cross-Attention Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28458-28467} }
SGS-Intrinsic: Semantic-Invariant Gaussian Splatting for Sparse-View Indoor Inverse Rendering: Jiahao Niu,

Rongjia Zheng,

Wenju Xu,

Wei-Shi Zheng,

Qing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Jiahao and Zheng, Rongjia and Xu, Wenju and Zheng, Wei-Shi and Zhang, Qing}, title = {SGS-Intrinsic: Semantic-Invariant Gaussian Splatting for Sparse-View Indoor Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26021-26030} }
COPYLENS: Towards Copyrighted Characters Infringement Detection via Copyright-Aware Prompt Learning: Yaoyu Jin,

Xiaochun Yang,

Hong Liu,

Leixia Wang,

Jian Li,

Rui Ding,

Bin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Yaoyu and Yang, Xiaochun and Liu, Hong and Wang, Leixia and Li, Jian and Ding, Rui and Wang, Bin}, title = {COPYLENS: Towards Copyrighted Characters Infringement Detection via Copyright-Aware Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24492-24502} }
OralGPT-Plus: Learning to Use Visual Tools via Reinforcement Learning for Panoramic X-ray Analysis: Yuxuan Fan,

Jing Hao,

Hong Chen,

Jiahao Bao,

Yihua Shao,

Yuci Liang,

Kuo Feng Hung,

Hao Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yuxuan and Hao, Jing and Chen, Hong and Bao, Jiahao and Shao, Yihua and Liang, Yuci and Hung, Kuo Feng and Tang, Hao}, title = {OralGPT-Plus: Learning to Use Visual Tools via Reinforcement Learning for Panoramic X-ray Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35373-35383} }
LS-ViT: Least-Squares Hessian Based Block Reconstruction for Low-Bit Post-Training Quantization of Vision Transformers: Hyunha Hwang,

Xuan Truong Nguyen,

Hyuk-Jae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Hwang_2026_CVPR, author = {Hwang, Hyunha and Nguyen, Xuan Truong and Lee, Hyuk-Jae}, title = {LS-ViT: Least-Squares Hessian Based Block Reconstruction for Low-Bit Post-Training Quantization of Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33588-33597} }
CURE: Curriculum-guided Multi-task Training for Reliable Anatomy Grounded Report Generation: Pablo Messina,

Andrés Villa,

Juan Leon Alcazar,

Karen Sanchez,

Carlos Hinojosa,

Denis Parra,

Alvaro Soto,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Messina_2026_CVPR, author = {Messina, Pablo and Villa, Andr\'es and Alcazar, Juan Leon and Sanchez, Karen and Hinojosa, Carlos and Parra, Denis and Soto, Alvaro and Ghanem, Bernard}, title = {CURE: Curriculum-guided Multi-task Training for Reliable Anatomy Grounded Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36279-36289} }
Text-Phase Synergy Network with Dual Priors for Unsupervised Cross-Domain Image Retrieval: Jing Yang,

Hui Xue,

Shipeng Zhu,

Pengfei Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Xue, Hui and Zhu, Shipeng and Fang, Pengfei}, title = {Text-Phase Synergy Network with Dual Priors for Unsupervised Cross-Domain Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23891-23900} }
DrivePI: Spatial-aware 4D MLLM for Unified Autonomous Driving Understanding, Perception, Prediction and Planning: Zhe Liu,

Runhui Huang,

Rui Yang,

Siming Yan,

Zining Wang,

Lu Hou,

Di Lin,

Xiang Bai,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhe and Huang, Runhui and Yang, Rui and Yan, Siming and Wang, Zining and Hou, Lu and Lin, Di and Bai, Xiang and Zhao, Hengshuang}, title = {DrivePI: Spatial-aware 4D MLLM for Unified Autonomous Driving Understanding, Perception, Prediction and Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3688-3698} }
Learning complete and explainable visual representations from itemized text supervision: Yiwei Lyu,

Chenhui Zhao,

Soumyanil Banerjee,

Shixuan Liu,

Akshay Rao,

Akhil Kondepudi,

Honglak Lee,

Todd C. Hollon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Yiwei and Zhao, Chenhui and Banerjee, Soumyanil and Liu, Shixuan and Rao, Akshay and Kondepudi, Akhil and Lee, Honglak and Hollon, Todd C.}, title = {Learning complete and explainable visual representations from itemized text supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21110-21120} }
Charge: A Comprehensive Novel View Synthesis Benchmark and Dataset to Bind Them All: Michal Nazarczuk,

Thomas Tanay,

Arthur Moreau,

Zhensong Zhang,

Eduardo Pérez-Pellitero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nazarczuk_2026_CVPR, author = {Nazarczuk, Michal and Tanay, Thomas and Moreau, Arthur and Zhang, Zhensong and P\'erez-Pellitero, Eduardo}, title = {Charge: A Comprehensive Novel View Synthesis Benchmark and Dataset to Bind Them All}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15323-15333} }
Your Latent Mask is Wrong: Pixel-Equivalent Latent Compositing for Diffusion Models: Rowan Bradbury,

Dazhi Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bradbury_2026_CVPR, author = {Bradbury, Rowan and Zhong, Dazhi}, title = {Your Latent Mask is Wrong: Pixel-Equivalent Latent Compositing for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18630-18639} }
NAF: Zero-Shot Feature Upsampling via Neighborhood Attention Filtering: Loïck Chambon,

Paul Couairon,

Éloi Zablocki,

Alexandre Boulch,

Nicolas Thome,

Matthieu Cord; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chambon_2026_CVPR, author = {Chambon, Lo{\"\i}ck and Couairon, Paul and Zablocki, \'Eloi and Boulch, Alexandre and Thome, Nicolas and Cord, Matthieu}, title = {NAF: Zero-Shot Feature Upsampling via Neighborhood Attention Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26604-26613} }
WOD-E2E: Waymo Open Dataset for End-to-End Driving in Challenging Long-tail Scenarios: Runsheng Xu,

Hubert Lin,

Wonseok Jeon,

Hao Feng,

Yuliang Zou,

Liting Sun,

John Gorman,

Kate Tolstaya,

Sarah Tang,

Brandyn White,

Ben Sapp,

Mingxing Tan,

Jyh-Jing Hwang,

Dragomir Anguelov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Runsheng and Lin, Hubert and Jeon, Wonseok and Feng, Hao and Zou, Yuliang and Sun, Liting and Gorman, John and Tolstaya, Kate and Tang, Sarah and White, Brandyn and Sapp, Ben and Tan, Mingxing and Hwang, Jyh-Jing and Anguelov, Dragomir}, title = {WOD-E2E: Waymo Open Dataset for End-to-End Driving in Challenging Long-tail Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3709-3718} }
NoRD: A Data-Efficient Vision-Language-Action Model that Drives without Reasoning: Ishaan Rawal,

Shubh Gupta,

Yihan Hu,

Wei Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rawal_2026_CVPR, author = {Rawal, Ishaan and Gupta, Shubh and Hu, Yihan and Zhan, Wei}, title = {NoRD: A Data-Efficient Vision-Language-Action Model that Drives without Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10965-10975} }
Personalized Longitudinal Medical Report Generation via Temporally-Aware Federated Adaptation: He Zhu,

Ren Togo,

Takahiro Ogawa,

Kenji Hirata,

Minghui Tang,

Takaaki Yoshimura,

Hiroyuki Sugimori,

Noriko Nishioka,

Yukie Shimizu,

Kohsuke Kudo,

Miki Haseyama; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, He and Togo, Ren and Ogawa, Takahiro and Hirata, Kenji and Tang, Minghui and Yoshimura, Takaaki and Sugimori, Hiroyuki and Nishioka, Noriko and Shimizu, Yukie and Kudo, Kohsuke and Haseyama, Miki}, title = {Personalized Longitudinal Medical Report Generation via Temporally-Aware Federated Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42701-42710} }
AR2-4FV: Anchored Referring and Re-identification for Long-Term Grounding in Fixed-View Videos: Teng Yan,

Yihan Liu,

Jiongxu Chen,

Teng Wang,

Jiaqi Li,

Bingzhuo Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Teng and Liu, Yihan and Chen, Jiongxu and Wang, Teng and Li, Jiaqi and Zhong, Bingzhuo}, title = {AR2-4FV: Anchored Referring and Re-identification for Long-Term Grounding in Fixed-View Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17568-17577} }
Sparse-View Localization via Online Neural 3D Regression: Ludvig Dillén,

Magnus Oskarsson,

Viktor Larsson; [pdf] [supp]
[bibtex]
@InProceedings{Dillen_2026_CVPR, author = {Dill\'en, Ludvig and Oskarsson, Magnus and Larsson, Viktor}, title = {Sparse-View Localization via Online Neural 3D Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21794-21804} }
Free-Grained Hierarchical Visual Recognition: Seulki Park,

Zilin Wang,

Stella X. Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Seulki and Wang, Zilin and Yu, Stella X.}, title = {Free-Grained Hierarchical Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32767-32776} }
KV-Tracker: Real-Time Pose Tracking with Transformers: Marwan Taher,

Ignacio Alzugaray,

Kirill Mazur,

Xin Kong,

Andrew Davison; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taher_2026_CVPR, author = {Taher, Marwan and Alzugaray, Ignacio and Mazur, Kirill and Kong, Xin and Davison, Andrew}, title = {KV-Tracker: Real-Time Pose Tracking with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28990-28999} }
PhysGen: Physically Grounded 3D Shape Generation for Industrial Design: Yingxuan You,

Chen Zhao,

Hantao Zhang,

Ming Xu,

Pascal Fua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Yingxuan and Zhao, Chen and Zhang, Hantao and Xu, Ming and Fua, Pascal}, title = {PhysGen: Physically Grounded 3D Shape Generation for Industrial Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27209-27218} }
Diffusion MRI Transformer with a Diffusion Space Rotary Positional Embedding (D-RoPE): Gustavo Chau Loo Kung,

Mohammad Abbasi,

Camila Blank,

Juze Zhang,

Alan Q. Wang,

Sophie Ostmeier,

Akshay Chaudhari,

Kilian Pohl,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kung_2026_CVPR, author = {Kung, Gustavo Chau Loo and Abbasi, Mohammad and Blank, Camila and Zhang, Juze and Wang, Alan Q. and Ostmeier, Sophie and Chaudhari, Akshay and Pohl, Kilian and Adeli, Ehsan}, title = {Diffusion MRI Transformer with a Diffusion Space Rotary Positional Embedding (D-RoPE)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38430-38441} }
OraPO: Oracle-educated Reinforcement Learning for Data-efficient and Factual Radiology Report Generation: Zhuoxiao Chen,

Hongyang Yu,

Ying Xu,

Yadan Luo,

Long Duong,

Yuan-Fang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuoxiao and Yu, Hongyang and Xu, Ying and Luo, Yadan and Duong, Long and Li, Yuan-Fang}, title = {OraPO: Oracle-educated Reinforcement Learning for Data-efficient and Factual Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28275-28287} }
PA-Attack: Guiding Gray-Box Attacks on LVLM Vision Encoders with Prototypes and Attention: Hefei Mei,

Zirui Wang,

Chang Xu,

Jianyuan Guo,

Minjing Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2026_CVPR, author = {Mei, Hefei and Wang, Zirui and Xu, Chang and Guo, Jianyuan and Dong, Minjing}, title = {PA-Attack: Guiding Gray-Box Attacks on LVLM Vision Encoders with Prototypes and Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15679-15688} }
Variation-aware Vision Token Dropping for Faster Large Vision-Language Models: Junjie Chen,

Xuyang Liu,

Zichen Wen,

Yiyu Wang,

Siteng Huang,

Honggang Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Xuyang and Wen, Zichen and Wang, Yiyu and Huang, Siteng and Chen, Honggang}, title = {Variation-aware Vision Token Dropping for Faster Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3489-3499} }
M4V: Multimodal Mamba for Efficient Text-to-Video Generation: Jiancheng Huang,

Gengwei Zhang,

Zequn Jie,

Siyu Jiao,

Yinlong Qian,

Ling Chen,

Yunchao Wei,

Lin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiancheng and Zhang, Gengwei and Jie, Zequn and Jiao, Siyu and Qian, Yinlong and Chen, Ling and Wei, Yunchao and Ma, Lin}, title = {M4V: Multimodal Mamba for Efficient Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36747-36757} }
TableMix: Enhancing Multimodal Table Reasoning in MLLMs from a Data-Centric Perspective: Chaohu Liu,

Shida Wang,

Yubo Wang,

Linli Xu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chaohu and Wang, Shida and Wang, Yubo and Xu, Linli}, title = {TableMix: Enhancing Multimodal Table Reasoning in MLLMs from a Data-Centric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33553-33565} }
Widget2Code: From Visual Widgets to UI Code via Multimodal LLMs: Houston H. Zhang,

Tao Zhang,

Baoze Lin,

Yuanqi Xue,

Yincheng Zhu,

Huan Liu,

Li Gu,

Linfeng Ye,

Ziqiang Wang,

Xinxin Zuo,

Yang Wang,

Yuanhao Yu,

Zhixiang Chi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Houston H. and Zhang, Tao and Lin, Baoze and Xue, Yuanqi and Zhu, Yincheng and Liu, Huan and Gu, Li and Ye, Linfeng and Wang, Ziqiang and Zuo, Xinxin and Wang, Yang and Yu, Yuanhao and Chi, Zhixiang}, title = {Widget2Code: From Visual Widgets to UI Code via Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20293-20302} }
IntrinsicWeather: Controllable Weather Editing in Intrinsic Space: Yixin Zhu,

Zuo-Liang Zhu,

Jian Yang,

Miloš Hašan,

Jin Xie,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yixin and Zhu, Zuo-Liang and Yang, Jian and Ha\v{s}an, Milo\v{s} and Xie, Jin and Wang, Beibei}, title = {IntrinsicWeather: Controllable Weather Editing in Intrinsic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30772-30781} }
Active Inference for Micro-Gesture Recognition: EFE-Guided Temporal Sampling and Adaptive Learning: Weijia Feng,

Jingyu Yang,

Ruojia Zhang,

Fengtao Sun,

Qian Gao,

Chenyang Wang,

Tongtong Su,

Jia Guo,

Xiaobai Li,

Minglai Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Weijia and Yang, Jingyu and Zhang, Ruojia and Sun, Fengtao and Gao, Qian and Wang, Chenyang and Su, Tongtong and Guo, Jia and Li, Xiaobai and Shao, Minglai}, title = {Active Inference for Micro-Gesture Recognition: EFE-Guided Temporal Sampling and Adaptive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13887-13896} }
Information-Theoretic Decomposition for Multimodal Interaction Learning: Zequn Yang,

Yake Wei,

Haotian Ni,

Zhihao Xu,

Di Hu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zequn and Wei, Yake and Ni, Haotian and Xu, Zhihao and Hu, Di}, title = {Information-Theoretic Decomposition for Multimodal Interaction Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30278-30287} }
Learnability-Guided Diffusion for Dataset Distillation: Jeffrey A. Chan-Santiago,

Mubarak Shah; [pdf] [supp]
[bibtex]
@InProceedings{Chan-Santiago_2026_CVPR, author = {Chan-Santiago, Jeffrey A. and Shah, Mubarak}, title = {Learnability-Guided Diffusion for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41657-41666} }
Partial Weakly-Supervised Oriented Object Detection: Mingxin Liu,

Peiyuan Zhang,

Yuan Liu,

Wei Zhang,

Yue Zhou,

Ning Liao,

Ziyang Gong,

Junwei Luo,

Zhirui Wang,

Yi Yu,

Xue Yang; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mingxin and Zhang, Peiyuan and Liu, Yuan and Zhang, Wei and Zhou, Yue and Liao, Ning and Gong, Ziyang and Luo, Junwei and Wang, Zhirui and Yu, Yi and Yang, Xue}, title = {Partial Weakly-Supervised Oriented Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27644-27654} }
When Robots Should Say ''I Don't Know'': Benchmarking Abstention in Embodied Question Answering: Tao Wu,

Chuhao Zhou,

Guangyu Zhao,

Haozhi Cao,

Yewen Pu,

Jianfei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Zhou, Chuhao and Zhao, Guangyu and Cao, Haozhi and Pu, Yewen and Yang, Jianfei}, title = {When Robots Should Say ''I Don't Know'': Benchmarking Abstention in Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15266-15275} }
FMPose3D: monocular 3D pose estimation via flow matching: Ti Wang,

Xiaohang Yu,

Mackenzie Weygandt Mathis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ti and Yu, Xiaohang and Mathis, Mackenzie Weygandt}, title = {FMPose3D: monocular 3D pose estimation via flow matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14669-14679} }
VABench: A Comprehensive Benchmark for Audio-Video Generation: Daili Hua,

Xizhi Wang,

Bohan Zeng,

Xinyi Huang,

Hao Liang,

Junbo Niu,

Xinlong Chen,

Quanqing Xu,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2026_CVPR, author = {Hua, Daili and Wang, Xizhi and Zeng, Bohan and Huang, Xinyi and Liang, Hao and Niu, Junbo and Chen, Xinlong and Xu, Quanqing and Zhang, Wentao}, title = {VABench: A Comprehensive Benchmark for Audio-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23345-23355} }
Taming Sampling Perturbations with Variance Expansion Loss for Latent Diffusion Models: Qifan Li,

Xingyu Zhou,

Jinhua Zhang,

Weiyi You,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qifan and Zhou, Xingyu and Zhang, Jinhua and You, Weiyi and Gu, Shuhang}, title = {Taming Sampling Perturbations with Variance Expansion Loss for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43643-43652} }
Seeing Beyond: Extrapolative Domain Adaptive Panoramic Segmentation: Yuanfan Zheng,

Kunyu Peng,

Xu Zheng,

Kailun Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yuanfan and Peng, Kunyu and Zheng, Xu and Yang, Kailun}, title = {Seeing Beyond: Extrapolative Domain Adaptive Panoramic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42115-42125} }
PointGS: Semantic-Consistent Unsupervised 3D Point Cloud Segmentation with 3D Gaussian Splatting: Yixiao Song,

Qingyong Li,

Wen Wang,

Zhicheng Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yixiao and Li, Qingyong and Wang, Wen and Yan, Zhicheng}, title = {PointGS: Semantic-Consistent Unsupervised 3D Point Cloud Segmentation with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33343-33352} }
A Sanity Check for Multi-In-Domain Face Forgery Detection in the Real World: Jikang Cheng,

Renye Yan,

Zhiyuan Yan,

Yaozhong Gan,

Xueyi Zhang,

Zhongyuan Wang,

Wei Peng,

Ling Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Jikang and Yan, Renye and Yan, Zhiyuan and Gan, Yaozhong and Zhang, Xueyi and Wang, Zhongyuan and Peng, Wei and Liang, Ling}, title = {A Sanity Check for Multi-In-Domain Face Forgery Detection in the Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21306-21315} }
DGGT: Feedforward 4D Reconstruction of Dynamic Driving Scenes using Unposed Images: Xiaoxue Chen,

Ziyi Xiong,

Yuantao Chen,

Gen Li,

Nan Wang,

Hongcheng Luo,

Long Chen,

Haiyang Sun,

Bing Wang,

Guang Chen,

Hongyang Li,

Ya-Qin Zhang,

Hangjun Ye,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaoxue and Xiong, Ziyi and Chen, Yuantao and Li, Gen and Wang, Nan and Luo, Hongcheng and Chen, Long and Sun, Haiyang and Wang, Bing and Chen, Guang and Li, Hongyang and Zhang, Ya-Qin and Ye, Hangjun and Zhao, Hao}, title = {DGGT: Feedforward 4D Reconstruction of Dynamic Driving Scenes using Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1265-1276} }
ProPhy: Progressive Physical Alignment for Dynamic World Simulation: Zijun Wang,

Panwen Hu,

Jing Wang,

Terry Jingchen Zhang,

Yuhao Cheng,

Long Chen,

Yiqiang Yan,

Zutao Jiang,

Hanhui Li,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zijun and Hu, Panwen and Wang, Jing and Zhang, Terry Jingchen and Cheng, Yuhao and Chen, Long and Yan, Yiqiang and Jiang, Zutao and Li, Hanhui and Liang, Xiaodan}, title = {ProPhy: Progressive Physical Alignment for Dynamic World Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14492-14501} }
FlashCap: Millisecond-Accurate Human Motion Capture via Flashing LEDs and Event-Based Vision: Zekai Wu,

Shuqi Fan,

Mengyin Liu,

Yuhua Luo,

Xincheng Lin,

Ming Yan,

Junhao Wu,

Xiuhong Lin,

Yuexin Ma,

Chenglu Wen,

Lan Xu,

Siqi Shen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zekai and Fan, Shuqi and Liu, Mengyin and Luo, Yuhua and Lin, Xincheng and Yan, Ming and Wu, Junhao and Lin, Xiuhong and Ma, Yuexin and Wen, Chenglu and Xu, Lan and Shen, Siqi and Wang, Cheng}, title = {FlashCap: Millisecond-Accurate Human Motion Capture via Flashing LEDs and Event-Based Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2221-2231} }
Venus: Benchmarking and Empowering Multimodal Large Language Models for Aesthetic Guidance and Cropping: Tianxiang Du,

Hulingxiao He,

Yuxin Peng; [pdf] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Tianxiang and He, Hulingxiao and Peng, Yuxin}, title = {Venus: Benchmarking and Empowering Multimodal Large Language Models for Aesthetic Guidance and Cropping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37766-37776} }
IEBGL:An Interpretability-Enhanced Brain Graph Learning Framework with LLM-Instructed Topology and Literature-Augmented Semantics: Yihang Duan,

Shuo Huang,

Li Zhang,

Meiling Wang,

Li Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Yihang and Huang, Shuo and Zhang, Li and Wang, Meiling and Zhang, Li}, title = {IEBGL:An Interpretability-Enhanced Brain Graph Learning Framework with LLM-Instructed Topology and Literature-Augmented Semantics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35331-35340} }
Semantic Scale Space: A Framework for Controllable Image Abstraction: Kazu Mishiba; [pdf] [supp]
[bibtex]
@InProceedings{Mishiba_2026_CVPR, author = {Mishiba, Kazu}, title = {Semantic Scale Space: A Framework for Controllable Image Abstraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17367-17376} }
E-RayZer: Self-supervised 3D Reconstruction as Spatial Visual Pre-training: Qitao Zhao,

Hao Tan,

Qianqian Wang,

Sai Bi,

Kai Zhang,

Kalyan Sunkavalli,

Shubham Tulsiani,

Hanwen Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qitao and Tan, Hao and Wang, Qianqian and Bi, Sai and Zhang, Kai and Sunkavalli, Kalyan and Tulsiani, Shubham and Jiang, Hanwen}, title = {E-RayZer: Self-supervised 3D Reconstruction as Spatial Visual Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7525-7535} }
x^2-Fusion: Cross-Modality and Cross-Dimension Flow Estimation in Event Edge Space: Ruishan Guo,

Ciyu Ruan,

Haoyang Wang,

Zihang Gong,

Jingao Xu,

Xinlei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ruishan and Ruan, Ciyu and Wang, Haoyang and Gong, Zihang and Xu, Jingao and Chen, Xinlei}, title = {x{\textasciicircum}2-Fusion: Cross-Modality and Cross-Dimension Flow Estimation in Event Edge Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15145-15155} }
RPGFusion: 4D Radar Prior-Guided Multi-Modal Fusion for 3D Detection: Xin Qiu,

Wenjie Liu; [pdf]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xin and Liu, Wenjie}, title = {RPGFusion: 4D Radar Prior-Guided Multi-Modal Fusion for 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {284-294} }
Mind the Discriminability Trap in Source-Free Cross-domain Few-shot Learning: Zhenyu Zhang,

Yixiong Zou,

Yuhua Li,

Ruixuan Li,

Guangyao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenyu and Zou, Yixiong and Li, Yuhua and Li, Ruixuan and Chen, Guangyao}, title = {Mind the Discriminability Trap in Source-Free Cross-domain Few-shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36978-36988} }
Block-based Learned Image Compression without Blocking Artifacts: Jong Wook Kim,

Suyong Bahk,

TaeHwa Lee,

HyunDong Cho,

Donghyun Kim,

Sung-Chang Lim,

Jin Soo Choi,

Hui Yong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jong Wook and Bahk, Suyong and Lee, TaeHwa and Cho, HyunDong and Kim, Donghyun and Lim, Sung-Chang and Choi, Jin Soo and Kim, Hui Yong}, title = {Block-based Learned Image Compression without Blocking Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19330-19338} }
FRAMER: Frequency-Aligned Self-Distillation with Adaptive Modulation Leveraging Diffusion Priors for Real-World Image Super-Resolution: Seungho Choi,

Jeahun Sung,

Jihyong Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Seungho and Sung, Jeahun and Oh, Jihyong}, title = {FRAMER: Frequency-Aligned Self-Distillation with Adaptive Modulation Leveraging Diffusion Priors for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23451-23461} }
SoccerMaster: A Vision Foundation Model for Soccer Understanding: Haolin Yang,

Jiayuan Rao,

Haoning Wu,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haolin and Rao, Jiayuan and Wu, Haoning and Xie, Weidi}, title = {SoccerMaster: A Vision Foundation Model for Soccer Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21549-21560} }
Rounded or Streamlined Head? Bridging Concept Bottleneck Models and Attribute-Described Object Parts: Yang Liu,

Jiajin Zhang,

Yaojun Hu,

Bingguang Hao,

Xin Cao,

Yingda Xia,

Danyang Tu,

Shi Gu,

Ling Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Zhang, Jiajin and Hu, Yaojun and Hao, Bingguang and Cao, Xin and Xia, Yingda and Tu, Danyang and Gu, Shi and Zhang, Ling}, title = {Rounded or Streamlined Head? Bridging Concept Bottleneck Models and Attribute-Described Object Parts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9880-9890} }
Bridging the 2D-3D Gap: A Hierarchical Semantic-Geometric Map for Vision Language Navigation: Kailing Li,

Tianwen Qian,

Lijin Yang,

Yuqian Fu,

Jingyu Gong,

Xiaoling Wang,

Liang He; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Kailing and Qian, Tianwen and Yang, Lijin and Fu, Yuqian and Gong, Jingyu and Wang, Xiaoling and He, Liang}, title = {Bridging the 2D-3D Gap: A Hierarchical Semantic-Geometric Map for Vision Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15243-15252} }
Progressive Supernet Training for Efficient Visual Autoregressive Modeling: Xiaoyue Chen,

Yuling Shi,

Kaiyuan Li,

Huandong Wang,

Yong Li,

Xiaodong Gu,

Xinlei Chen,

Mingbao Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaoyue and Shi, Yuling and Li, Kaiyuan and Wang, Huandong and Li, Yong and Gu, Xiaodong and Chen, Xinlei and Lin, Mingbao}, title = {Progressive Supernet Training for Efficient Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37950-37959} }
Multi-Paradigm Collaborative Adversarial Attack Against Multi-Modal Large Language Models: Yuanbo Li,

Tianyang Xu,

Cong Hu,

Tao Zhou,

Xiao-Jun Wu,

Josef Kittler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuanbo and Xu, Tianyang and Hu, Cong and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Multi-Paradigm Collaborative Adversarial Attack Against Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30065-30075} }
SparseWorld-TC: Trajectory-Conditioned Sparse Occupancy World Model: Jiayuan Du,

Yiming Zhao,

Zhenglong Guo,

Yong Pan,

Wenbo Hou,

Zhihui Hao,

Kun Zhan,

Qijun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Jiayuan and Zhao, Yiming and Guo, Zhenglong and Pan, Yong and Hou, Wenbo and Hao, Zhihui and Zhan, Kun and Chen, Qijun}, title = {SparseWorld-TC: Trajectory-Conditioned Sparse Occupancy World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7425-7434} }
Universal 3D Shape Matching via Coarse-to-Fine Language Guidance: Qinfeng Xiao,

Guofeng Mei,

Bo Yang,

Liying Zhang,

Jian Zhang,

Kit-lun Yick; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Qinfeng and Mei, Guofeng and Yang, Bo and Zhang, Liying and Zhang, Jian and Yick, Kit-lun}, title = {Universal 3D Shape Matching via Coarse-to-Fine Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13157-13167} }
Incremental Object Detection via Future-Aware Decoupled Cross-Head Distillation: Chenfeng Yin,

De Cheng,

Wenlong Luo,

Mingyue Zeng,

Shizhou Zhang,

Nannan Wang,

Xinbo Gao; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Chenfeng and Cheng, De and Luo, Wenlong and Zeng, Mingyue and Zhang, Shizhou and Wang, Nannan and Gao, Xinbo}, title = {Incremental Object Detection via Future-Aware Decoupled Cross-Head Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39935-39944} }
VSRELL: A Simple Baseline for Video Super-Resolution and Enhancement in Low-Light Environment: Yanming Hui,

Fanhua Shang,

Hongying Liu,

Ben Wang,

Zhenwei Zhang,

Liang Wan,

Wei Feng,

Tong Xue,

Bingqin Lv; [pdf] [supp]
[bibtex]
@InProceedings{Hui_2026_CVPR, author = {Hui, Yanming and Shang, Fanhua and Liu, Hongying and Wang, Ben and Zhang, Zhenwei and Wan, Liang and Feng, Wei and Xue, Tong and Lv, Bingqin}, title = {VSRELL: A Simple Baseline for Video Super-Resolution and Enhancement in Low-Light Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16345-16354} }
Learning to Drive is a Free Gift: Large-Scale Label-Free Autonomy Pretraining from Unposed In-The-Wild Videos: Matthew Strong,

Wei-Jer Chang,

Quentin Herau,

Jiezhi Yang,

Yihan Hu,

Chensheng Peng,

Wei Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Strong_2026_CVPR, author = {Strong, Matthew and Chang, Wei-Jer and Herau, Quentin and Yang, Jiezhi and Hu, Yihan and Peng, Chensheng and Zhan, Wei}, title = {Learning to Drive is a Free Gift: Large-Scale Label-Free Autonomy Pretraining from Unposed In-The-Wild Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32144-32153} }
MRD: Multi-resolution Retrieval-Detection Fusion for High-Resolution Image Understanding: Fan Yang,

Xingping Dong,

Xin Yu,

Wenhan Luo,

Wei Liu,

Kaihao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Fan and Dong, Xingping and Yu, Xin and Luo, Wenhan and Liu, Wei and Zhang, Kaihao}, title = {MRD: Multi-resolution Retrieval-Detection Fusion for High-Resolution Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2693-2703} }
RoMo: A Large-Scale, Richly Organized Dataset and Semantic Taxonomy for Human Motion Generation: Jiahao Zhang,

Joseph Liu,

Young-Yoon Lee,

Seonghyeon Moon,

Victor Zordan,

Guy Tevet,

C. Karen Liu,

Stephen Gould,

Oren Jacob,

Haomiao Jiang,

Mubbasir Kapadia,

Yizhak Ben-Shabat; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiahao and Liu, Joseph and Lee, Young-Yoon and Moon, Seonghyeon and Zordan, Victor and Tevet, Guy and Liu, C. Karen and Gould, Stephen and Jacob, Oren and Jiang, Haomiao and Kapadia, Mubbasir and Ben-Shabat, Yizhak}, title = {RoMo: A Large-Scale, Richly Organized Dataset and Semantic Taxonomy for Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16408-16419} }
TransPrune: Token Transition Pruning for Efficient Large Vision-Language Model: Ao Li,

Yuxiang Duan,

Jinghui Zhang,

Congbo Ma,

Yutong Xie,

Gustavo Carneiro,

Mohammad Yaqub,

Hu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ao and Duan, Yuxiang and Zhang, Jinghui and Ma, Congbo and Xie, Yutong and Carneiro, Gustavo and Yaqub, Mohammad and Wang, Hu}, title = {TransPrune: Token Transition Pruning for Efficient Large Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39529-39538} }
SEA-Flow3D: Simplified, Efficient, and Accurate Scene Flow via Spatial Vector Sampling and Multi-scale Refinement: Han Ling,

Quansen Sun,

Yinghua Yao,

Ivor Tsang,

Yinghui Sun; [pdf] [supp]
[bibtex]
@InProceedings{Ling_2026_CVPR, author = {Ling, Han and Sun, Quansen and Yao, Yinghua and Tsang, Ivor and Sun, Yinghui}, title = {SEA-Flow3D: Simplified, Efficient, and Accurate Scene Flow via Spatial Vector Sampling and Multi-scale Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36475-36484} }
SplatSuRe: Selective Super-Resolution for Multi-view Consistent 3D Gaussian Splatting: Pranav Asthana,

Alex Hanson,

Allen Tu,

Tom Goldstein,

Matthias Zwicker,

Amitabh Varshney; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asthana_2026_CVPR, author = {Asthana, Pranav and Hanson, Alex and Tu, Allen and Goldstein, Tom and Zwicker, Matthias and Varshney, Amitabh}, title = {SplatSuRe: Selective Super-Resolution for Multi-view Consistent 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11840-11849} }
TAMER: A Tri-Modal Contrastive Alignment and Multi-Scale Embedding Refinement Framework for Zero-Shot ECG Diagnosis: Xuewei Zhou,

Yajie Meng,

Pan Zeng,

Xianfang Tang,

Feifei Cui,

Qiangguo Jin,

Jialiang Yang,

Junlin Xu; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuewei and Meng, Yajie and Zeng, Pan and Tang, Xianfang and Cui, Feifei and Jin, Qiangguo and Yang, Jialiang and Xu, Junlin}, title = {TAMER: A Tri-Modal Contrastive Alignment and Multi-Scale Embedding Refinement Framework for Zero-Shot ECG Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10502-10511} }
RunawayEvil: Jailbreaking the Image-to-Video Generative Models: Songping Wang,

Rufan Qian,

Yueming Lyu,

Qinglong Liu,

Linzhuang Zou,

Jie Qin,

Songhua Liu,

Caifeng Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Songping and Qian, Rufan and Lyu, Yueming and Liu, Qinglong and Zou, Linzhuang and Qin, Jie and Liu, Songhua and Shan, Caifeng}, title = {RunawayEvil: Jailbreaking the Image-to-Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9296-9305} }
Towards High-Quality Image Segmentation: Improving Topology Accuracy by Penalizing Neighbor Pixels: Juan Miguel Valverde,

Dim P. Papadopoulos,

Rasmus Larsen,

Anders Bjorholm Dahl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Valverde_2026_CVPR, author = {Valverde, Juan Miguel and Papadopoulos, Dim P. and Larsen, Rasmus and Dahl, Anders Bjorholm}, title = {Towards High-Quality Image Segmentation: Improving Topology Accuracy by Penalizing Neighbor Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13123-13133} }
Stronger Normalization-Free Transformers: Mingzhi Chen,

Taiming Lu,

Jiachen Zhu,

Mingjie Sun,

Zhuang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Mingzhi and Lu, Taiming and Zhu, Jiachen and Sun, Mingjie and Liu, Zhuang}, title = {Stronger Normalization-Free Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27418-27428} }
InstantRetouch: Efficient and High-Fidelity Instruction-Guided Image Retouching with Bilateral Space: Jiarui Wu,

Yujin Wang,

Ruikang Li,

Fan Zhang,

Mingde Yao,

Tianfan Xue; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jiarui and Wang, Yujin and Li, Ruikang and Zhang, Fan and Yao, Mingde and Xue, Tianfan}, title = {InstantRetouch: Efficient and High-Fidelity Instruction-Guided Image Retouching with Bilateral Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8216-8226} }
Residual Decoder Adapter: ID-Preserving Tokenizer Adaption for Autoregressive Text Rendering: Dongxing Mao,

Alex Jinpeng Wang,

Jiahao Tang,

Kevin Qinghong Lin,

Linjie Li,

Zhengyuan Yang,

Lijuan Wang,

Min Li,

Jingru Tan; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Dongxing and Wang, Alex Jinpeng and Tang, Jiahao and Lin, Kevin Qinghong and Li, Linjie and Yang, Zhengyuan and Wang, Lijuan and Li, Min and Tan, Jingru}, title = {Residual Decoder Adapter: ID-Preserving Tokenizer Adaption for Autoregressive Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22017-22027} }
When AVSR Meets Video Conferencing: Dataset, Degradation, and the Hidden Mechanism Behind Performance Collapse: Yihuan Huang,

Jun Xue,

Liu Jiajun,

Daixian Li,

Tong Zhang,

Zhuolin Yi,

Yanzhen Ren,

Kai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yihuan and Xue, Jun and Jiajun, Liu and Li, Daixian and Zhang, Tong and Yi, Zhuolin and Ren, Yanzhen and Li, Kai}, title = {When AVSR Meets Video Conferencing: Dataset, Degradation, and the Hidden Mechanism Behind Performance Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4448-4457} }
A More Word-like Image Tokenization for MLLMs: Hyun Lee,

Hyemin Jeong,

Yejin Kim,

Hyungwook Choi,

Hyunsoo Cho,

Soo Kyung Kim,

Joonseok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Hyun and Jeong, Hyemin and Kim, Yejin and Choi, Hyungwook and Cho, Hyunsoo and Kim, Soo Kyung and Lee, Joonseok}, title = {A More Word-like Image Tokenization for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17641-17650} }
Turning Pre-Trained Vision Transformers into End-to-End Histopathology Whole Slide Image Models for Survival Prediction: Jiawen Li,

Jiali Hu,

Xitong Ling,

Renao Yan,

Yuxuan Chen,

Tian Guan,

Yonghong He; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiawen and Hu, Jiali and Ling, Xitong and Yan, Renao and Chen, Yuxuan and Guan, Tian and He, Yonghong}, title = {Turning Pre-Trained Vision Transformers into End-to-End Histopathology Whole Slide Image Models for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21046-21056} }
Bias Is a Subspace, Not a Coordinate: A Geometric Rethinking of Post-hoc Debiasing in Vision-Language Models: Dachuan Zhao,

Weiyue Li,

Zhenda Shen,

Yushu Qiu,

Bowen Xu,

Haoyu Chen,

Yongchao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dachuan and Li, Weiyue and Shen, Zhenda and Qiu, Yushu and Xu, Bowen and Chen, Haoyu and Chen, Yongchao}, title = {Bias Is a Subspace, Not a Coordinate: A Geometric Rethinking of Post-hoc Debiasing in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10230-10240} }
SAMTok: Representing Any Mask with Two Words: Yikang Zhou,

Tao Zhang,

Dengxian Gong,

Yuanzheng Wu,

Ye Tian,

Haochen Wang,

Haobo Yuan,

Jiacong Wang,

Lu Qi,

Hao Fei,

Shunping Ji,

Anran Wang,

Zhuochen Wang,

Yujing Wang,

Cheng Chen,

Xiangtai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yikang and Zhang, Tao and Gong, Dengxian and Wu, Yuanzheng and Tian, Ye and Wang, Haochen and Yuan, Haobo and Wang, Jiacong and Qi, Lu and Fei, Hao and Ji, Shunping and Wang, Anran and Wang, Zhuochen and Wang, Yujing and Chen, Cheng and Li, Xiangtai}, title = {SAMTok: Representing Any Mask with Two Words}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37852-37863} }
Iris: Integrating Language into Diffusion-based Monocular Depth Estimation: Ziyao Zeng,

Jingcheng Ni,

Daniel Wang,

Patrick Rim,

Younjoon Chung,

Fengyu Yang,

Byung-Woo Hong,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyao and Ni, Jingcheng and Wang, Daniel and Rim, Patrick and Chung, Younjoon and Yang, Fengyu and Hong, Byung-Woo and Wong, Alex}, title = {Iris: Integrating Language into Diffusion-based Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34193-34205} }
MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes: Kehua Chen,

Tianlu Mao,

Xinzhu Ma,

Hao Jiang,

Zehao Li,

Zihan Liu,

Shuqin Gao,

Honglong Zhao,

Feng Dai,

Yucheng Zhang,

Zhaoqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Kehua and Mao, Tianlu and Ma, Xinzhu and Jiang, Hao and Li, Zehao and Liu, Zihan and Gao, Shuqin and Zhao, Honglong and Dai, Feng and Zhang, Yucheng and Wang, Zhaoqi}, title = {MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {395-405} }
CARLoS: Retrieval via Concise Assessment Representation of LoRAs at Scale: Shahar Sarfaty,

Adi Haviv,

Uri Hacohen,

Niva Elkin-Koren,

Roi Livni,

Amit H. Bermano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarfaty_2026_CVPR, author = {Sarfaty, Shahar and Haviv, Adi and Hacohen, Uri and Elkin-Koren, Niva and Livni, Roi and Bermano, Amit H.}, title = {CARLoS: Retrieval via Concise Assessment Representation of LoRAs at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23922-23932} }
DecoVLN: Decoupling Observation, Reasoning, and Correction for Vision-and-Language Navigation: Zihao Xin,

Wentong Li,

Yixuan Jiang,

Bin Wang,

Runmin Cong,

Jie Qin,

Shengjun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xin_2026_CVPR, author = {Xin, Zihao and Li, Wentong and Jiang, Yixuan and Wang, Bin and Cong, Runmin and Qin, Jie and Huang, Shengjun}, title = {DecoVLN: Decoupling Observation, Reasoning, and Correction for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32410-32420} }
PET-DINO: Unifying Visual Cues into Grounding DINO with Prompt-Enriched Training: Weifu Fu,

Jinyang Li,

Bin-Bin Gao,

Jialin Li,

Yuhuan Lin,

Hanqiu Deng,

Wenbing Tao,

Yong Liu,

Chengjie Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Weifu and Li, Jinyang and Gao, Bin-Bin and Li, Jialin and Lin, Yuhuan and Deng, Hanqiu and Tao, Wenbing and Liu, Yong and Wang, Chengjie}, title = {PET-DINO: Unifying Visual Cues into Grounding DINO with Prompt-Enriched Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13039-13048} }
Dual-Agent Reinforcement Learning for Adaptive and Cost-Aware Visual-Inertial Odometry: Feiyang Pan,

Shenghe Zheng,

Chunyan Yin,

Guangbin Dou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Feiyang and Zheng, Shenghe and Yin, Chunyan and Dou, Guangbin}, title = {Dual-Agent Reinforcement Learning for Adaptive and Cost-Aware Visual-Inertial Odometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24885-24894} }
UniRefiner: Teaching Pre-trained ViTs to Self-Dispose Dross via Contrastive Register: Congpei Qiu,

Zhaoyu Hu,

Wei Ke,

Zhuotao Tian,

Yanhao Wu,

Tong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Congpei and Hu, Zhaoyu and Ke, Wei and Tian, Zhuotao and Wu, Yanhao and Zhang, Tong}, title = {UniRefiner: Teaching Pre-trained ViTs to Self-Dispose Dross via Contrastive Register}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10061-10070} }
SafeLogo: Turning Your Logos into Jailbreak Shields via Micro-Regional Adversarial Training: Zhiyi Duan,

Xiaoyue Zhang,

Tianxing Man; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Zhiyi and Zhang, Xiaoyue and Man, Tianxing}, title = {SafeLogo: Turning Your Logos into Jailbreak Shields via Micro-Regional Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37611-37620} }
FastHybrid: Accelerating Hybrid Autoregressive Image Generation with Lookahead and Guided Decoding: Zhengguo Jiang,

Fang Zhang,

Yongxiang Hua,

Bocheng Li,

Wentao Zhang,

Linli Xu; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhengguo and Zhang, Fang and Hua, Yongxiang and Li, Bocheng and Zhang, Wentao and Xu, Linli}, title = {FastHybrid: Accelerating Hybrid Autoregressive Image Generation with Lookahead and Guided Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23204-23214} }
Towards Balanced Multi-Modal Learning in 3D Human Pose Estimation: Mengshi Qi,

Jiaxuan Peng,

Xianlin Zhang,

Huadong Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Mengshi and Peng, Jiaxuan and Zhang, Xianlin and Ma, Huadong}, title = {Towards Balanced Multi-Modal Learning in 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21231-21241} }
Attention Surgery: An Efficient Recipe to Linearize Your Video Diffusion Transformer: Mohsen Ghafoorian,

Denis Korzhenkov,

Amirhossein Habibian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghafoorian_2026_CVPR, author = {Ghafoorian, Mohsen and Korzhenkov, Denis and Habibian, Amirhossein}, title = {Attention Surgery: An Efficient Recipe to Linearize Your Video Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32915-32925} }
Elastic Weight Consolidation Done Right for Continual Learning: Xuan Liu,

Xiaobin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xuan and Chang, Xiaobin}, title = {Elastic Weight Consolidation Done Right for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3930-3940} }
ConsID-Gen: View-Consistent and Identity-Preserving Image-to-Video Generation: Mingyang Wu,

Ashirbad Mishra,

Soumik Dey,

Shuo Xing,

Naveen Ravipati,

Hansi Wu,

Binbin Li,

Zhengzhong Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Mingyang and Mishra, Ashirbad and Dey, Soumik and Xing, Shuo and Ravipati, Naveen and Wu, Hansi and Li, Binbin and Tu, Zhengzhong}, title = {ConsID-Gen: View-Consistent and Identity-Preserving Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1853-1863} }
VIRO: Robust and Efficient Neuro-Symbolic Reasoning with Verification for Referring Expression Comprehension: Hyejin Park,

Junhyuk Kwon,

Suha Kwak,

Jungseul Ok; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Hyejin and Kwon, Junhyuk and Kwak, Suha and Ok, Jungseul}, title = {VIRO: Robust and Efficient Neuro-Symbolic Reasoning with Verification for Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33426-33435} }
Best Segmentation Buddies for Image-Shape Correspondence: Itai Lang,

Dongwei Lyu,

Dale Decatur,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lang_2026_CVPR, author = {Lang, Itai and Lyu, Dongwei and Decatur, Dale and Hanocka, Rana}, title = {Best Segmentation Buddies for Image-Shape Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20499-20510} }
SketchDeco: Training-Free Latent Composition for Precise Sketch Colourisation: Chaitat Utintu,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Utintu_2026_CVPR, author = {Utintu, Chaitat and Song, Yi-Zhe}, title = {SketchDeco: Training-Free Latent Composition for Precise Sketch Colourisation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {484-494} }
SynthRGB-T: Language-Vision Guided Image Translation for Diversity Synthesis: Jiangang Ding,

Yiquan Du,

Pengxiang Li,

Lili Pei,

Yuanlin Zhao,

Wei Li; [pdf]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Jiangang and Du, Yiquan and Li, Pengxiang and Pei, Lili and Zhao, Yuanlin and Li, Wei}, title = {SynthRGB-T: Language-Vision Guided Image Translation for Diversity Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17259-17269} }
Few-Shot Incremental 3D Object Detection in Dynamic Indoor Environments: Yun Zhu,

Jianjun Qian,

Jian Yang,

Jin Xie,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yun and Qian, Jianjun and Yang, Jian and Xie, Jin and Zhao, Na}, title = {Few-Shot Incremental 3D Object Detection in Dynamic Indoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18786-18795} }
Better, Stronger, Faster: Tackling the Trilemma in MLLM-based Segmentation with Simultaneous Textual Mask Prediction: Jiazhen Liu,

Mingkuan Feng,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiazhen and Feng, Mingkuan and Chen, Long}, title = {Better, Stronger, Faster: Tackling the Trilemma in MLLM-based Segmentation with Simultaneous Textual Mask Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33121-33130} }
DDSF: Robust Few-Shot Learning via Disentangled Subspaces with Determinantal Point Process: Xulun Ye,

Yifan Mei,

Kun Zhou,

Zelei Wu,

Jieyu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Mei, Yifan and Zhou, Kun and Wu, Zelei and Zhao, Jieyu}, title = {DDSF: Robust Few-Shot Learning via Disentangled Subspaces with Determinantal Point Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19591-19601} }
VIRAL: Visual Sim-to-Real at Scale for Humanoid Loco-Manipulation: Tairan He,

Zi Wang,

Haoru Xue,

Qingwei Ben,

Zhengyi Luo,

Wenli Xiao,

Ye Yuan,

Xingye Da,

Fernando Castañeda,

Shankar Sastry,

Changliu Liu,

Guanya Shi,

Linxi Fan,

Yuke Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Tairan and Wang, Zi and Xue, Haoru and Ben, Qingwei and Luo, Zhengyi and Xiao, Wenli and Yuan, Ye and Da, Xingye and Casta\~neda, Fernando and Sastry, Shankar and Liu, Changliu and Shi, Guanya and Fan, Linxi and Zhu, Yuke}, title = {VIRAL: Visual Sim-to-Real at Scale for Humanoid Loco-Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13430-13441} }
Mirror Illusion Art: Xiaopei Zhu,

Zeyuan Li,

Jun Zhu,

Xiaolin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaopei and Li, Zeyuan and Zhu, Jun and Hu, Xiaolin}, title = {Mirror Illusion Art}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31577-31585} }
DiffusionHarmonizer: Bridging Neural Reconstruction and Photorealistic Simulation with Online Diffusion Enhancer: Yuxuan Zhang,

Katarína Tóthová,

Zian Wang,

Kangxue Yin,

Haithem Turki,

Riccardo de Lutio,

Yen-Yu Chang,

Or Litany,

Sanja Fidler,

Zan Gojcic; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and T\'othov\'a, Katar{\'\i}na and Wang, Zian and Yin, Kangxue and Turki, Haithem and de Lutio, Riccardo and Chang, Yen-Yu and Litany, Or and Fidler, Sanja and Gojcic, Zan}, title = {DiffusionHarmonizer: Bridging Neural Reconstruction and Photorealistic Simulation with Online Diffusion Enhancer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43494-43504} }
Parallel Rigidity Matters for Bundle Adjustment: Lalit Manam,

Venu Madhav Govindu; [pdf] [supp]
[bibtex]
@InProceedings{Manam_2026_CVPR, author = {Manam, Lalit and Govindu, Venu Madhav}, title = {Parallel Rigidity Matters for Bundle Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29035-29046} }
BiPreManip: Learning Affordance-Based Bimanual Preparatory Manipulation through Anticipatory Collaboration: Yan Shen,

Feng Jiang,

Zichen He,

Xiaoqi Li,

Yuchen Liu,

Zhiyu Li,

Ruihai Wu,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Yan and Jiang, Feng and He, Zichen and Li, Xiaoqi and Liu, Yuchen and Li, Zhiyu and Wu, Ruihai and Dong, Hao}, title = {BiPreManip: Learning Affordance-Based Bimanual Preparatory Manipulation through Anticipatory Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42430-42440} }
CATNet: Collaborative Alignment and Transformation Network for Cooperative Perception: Gong Chen,

Chaokun Zhang,

Tao Tang,

Pengcheng Lv,

Feng Li,

Xin Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Tang, Tao and Lv, Pengcheng and Li, Feng and Xie, Xin}, title = {CATNet: Collaborative Alignment and Transformation Network for Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18724-18733} }
PP-OCRv5: A Specialized 5M-Parameter Model Rivaling Billion-Parameter Vision-Language Models on OCR Tasks: Cheng Cui,

Yubo Zhang,

Ting Sun,

Xueqing Wang,

Hongen Liu,

Manhui Lin,

Yue Zhang,

Tingquan Gao,

Changda Zhou,

Jiaxuan Liu,

Zelun Zhang,

Jing Zhang,

Jun Zhang,

Yi Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Cheng and Zhang, Yubo and Sun, Ting and Wang, Xueqing and Liu, Hongen and Lin, Manhui and Zhang, Yue and Gao, Tingquan and Zhou, Changda and Liu, Jiaxuan and Zhang, Zelun and Zhang, Jing and Zhang, Jun and Liu, Yi}, title = {PP-OCRv5: A Specialized 5M-Parameter Model Rivaling Billion-Parameter Vision-Language Models on OCR Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2467-2476} }
RegionRoute: Regional Style Transfer with Diffusion Model: Bowen Chen,

Jake Zuena,

Alan C. Bovik,

Divya Kothandaraman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Bowen and Zuena, Jake and Bovik, Alan C. and Kothandaraman, Divya}, title = {RegionRoute: Regional Style Transfer with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35736-35746} }
SO(3)-Equivariant ViT-Adapter for Data-Efficient Zero-Shot Sim-to-Real Indoor Panoramic Depth Estimation: Ziyan He,

Qiudan Zhang,

Lin Ma,

Xu Wang; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Ziyan and Zhang, Qiudan and Ma, Lin and Wang, Xu}, title = {SO(3)-Equivariant ViT-Adapter for Data-Efficient Zero-Shot Sim-to-Real Indoor Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5740-5750} }
Expanding Spatial and Temporal Context for Robotic Imitation Learning With Scene Graphs: Jianing Qian,

Qinhe Peng,

Emmanuel Panov,

Leonor Fermoselle,

Dinesh Jayaraman,

Bernadette Bucher,

Tarik Kelestemur; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Jianing and Peng, Qinhe and Panov, Emmanuel and Fermoselle, Leonor and Jayaraman, Dinesh and Bucher, Bernadette and Kelestemur, Tarik}, title = {Expanding Spatial and Temporal Context for Robotic Imitation Learning With Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28010-28020} }
Towards Training-free Scene Text Editing: Yubo Li,

Xugong Qin,

Peng Zhang,

Hailun Lin,

Gangyan Zeng,

Kexin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yubo and Qin, Xugong and Zhang, Peng and Lin, Hailun and Zeng, Gangyan and Zhang, Kexin}, title = {Towards Training-free Scene Text Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15291-15301} }
ACoT-VLA: Action Chain-of-Thought for Vision-Language-Action Models: Linqing Zhong,

Yi Liu,

Yifei Wei,

Ziyu Xiong,

Si Liu,

Guanghui Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Linqing and Liu, Yi and Wei, Yifei and Xiong, Ziyu and Liu, Si and Ren, Guanghui}, title = {ACoT-VLA: Action Chain-of-Thought for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8152-8162} }
Fast SceneScript: Fast and Accurate Language-Based 3D Scene Understanding via Multi-Token Prediction: Ruihong Yin,

Xuepeng Shi,

Oleksandr Bailo,

Marco Manfredi,

Theo Gevers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Ruihong and Shi, Xuepeng and Bailo, Oleksandr and Manfredi, Marco and Gevers, Theo}, title = {Fast SceneScript: Fast and Accurate Language-Based 3D Scene Understanding via Multi-Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2457-2466} }
MSPT: Efficient Large-Scale Physical Modeling via Parallelized Multi-Scale Attention: Pedro M. P. Curvo,

Jan-Willem van de Meent,

Maksim Zhdanov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Curvo_2026_CVPR, author = {Curvo, Pedro M. P. and van de Meent, Jan-Willem and Zhdanov, Maksim}, title = {MSPT: Efficient Large-Scale Physical Modeling via Parallelized Multi-Scale Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12924-12933} }
Merge3D: Efficient 3D Multimodal LLMs via Joint 2D-3D Token Merging: Tianbo Pan,

Xingyi Yang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Tianbo and Yang, Xingyi and Wang, Xinchao}, title = {Merge3D: Efficient 3D Multimodal LLMs via Joint 2D-3D Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31066-31077} }
Spatial-Frequency Collaborative Learning for Occluded Visible-Infrared Person Re-Identification: Jian Yu,

Yujian Feng,

Shuai You,

Zhongkai Zhou,

Fei Wu,

Zhengjun Jing,

Yimu Ji; [pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Jian and Feng, Yujian and You, Shuai and Zhou, Zhongkai and Wu, Fei and Jing, Zhengjun and Ji, Yimu}, title = {Spatial-Frequency Collaborative Learning for Occluded Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4343-4352} }
IMAIA: Interactive Maps AI Assistant for Travel Planning and Geo-Spatial Intelligence: Jieren Deng,

Zhizhang Hu,

Ziyan He,

Aleksandar Cvetkovic,

Pak Kiu Chung,

Dragomir Yankov,

Chiqun Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Jieren and Hu, Zhizhang and He, Ziyan and Cvetkovic, Aleksandar and Chung, Pak Kiu and Yankov, Dragomir and Zhang, Chiqun}, title = {IMAIA: Interactive Maps AI Assistant for Travel Planning and Geo-Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40064-40073} }
SARMAE: Masked Autoencoder for SAR Representation Learning: Danxu Liu,

Di Wang,

Hebaixu Wang,

Haoyang Chen,

Wentao Jiang,

Yilin Cheng,

Haonan Guo,

Wei Cui,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Danxu and Wang, Di and Wang, Hebaixu and Chen, Haoyang and Jiang, Wentao and Cheng, Yilin and Guo, Haonan and Cui, Wei and Zhang, Jing}, title = {SARMAE: Masked Autoencoder for SAR Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6496-6507} }
ActiveGrasp: Information-Guided Active Grasping with Calibrated Energy-based Model: Boshu Lei,

Wen Jiang,

Kostas Daniilidis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Boshu and Jiang, Wen and Daniilidis, Kostas}, title = {ActiveGrasp: Information-Guided Active Grasping with Calibrated Energy-based Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42418-42429} }
Prototype-as-Prompt: Multimodal Sentiment Prototypes Endowing Large Language Models the Capability to Perform Multimodal Sentiment Analysis: Xianbing Zhao,

Lan Luo,

Hengyang Lu,

Buzhou Tang; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xianbing and Luo, Lan and Lu, Hengyang and Tang, Buzhou}, title = {Prototype-as-Prompt: Multimodal Sentiment Prototypes Endowing Large Language Models the Capability to Perform Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23010-23020} }
TROPHIES: Temporal Reconstruction of Places, Humans, and Cameras from Multi-view Videos: Jinpeng Liu,

Yukang Xu,

Yutong Li,

Xingyu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinpeng and Xu, Yukang and Li, Yutong and Liu, Xingyu}, title = {TROPHIES: Temporal Reconstruction of Places, Humans, and Cameras from Multi-view Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21154-21164} }
SVHalluc: Benchmarking Speech-Vision Hallucination in Audio-Visual Large Language Models: Chenshuang Zhang,

Kyeong Seon Kim,

Chengxin Liu,

Tae-Hyun Oh; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenshuang and Kim, Kyeong Seon and Liu, Chengxin and Oh, Tae-Hyun}, title = {SVHalluc: Benchmarking Speech-Vision Hallucination in Audio-Visual Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25304-25314} }
Bridging Privacy and Provenance: Traceable Virtual Identity Generation: Xianhan Zeng,

Xiaoxiao Hu,

Sheng Li,

Zhenxing Qian,

Xinpeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xianhan and Hu, Xiaoxiao and Li, Sheng and Qian, Zhenxing and Zhang, Xinpeng}, title = {Bridging Privacy and Provenance: Traceable Virtual Identity Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32367-32376} }
Modeling the Visual Ambiguity of Human Sketches: Yang Zhou,

Ping Ni,

Jin Wang,

Senyun Jia,

Jingdan Yan,

Kaixiang Huang,

Guodong Lu,

Jingru Yang,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yang and Ni, Ping and Wang, Jin and Jia, Senyun and Yan, Jingdan and Huang, Kaixiang and Lu, Guodong and Yang, Jingru and He, Shengfeng}, title = {Modeling the Visual Ambiguity of Human Sketches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16876-16886} }
CrossEarth-Gate: Fisher-Guided Adaptive Tuning Engine for Efficient Adaptation of Cross-Domain Remote Sensing Semantic Segmentation: Shilei Cao,

Ziyang Gong,

Hehai Lin,

Yang Liu,

Jiashun Cheng,

Xiaoxing Hu,

Haoyuan Liang,

Guowen Li,

Chengwei Qin,

Hong Cheng,

Xue Yang,

Juepeng Zheng,

Haohuan Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Shilei and Gong, Ziyang and Lin, Hehai and Liu, Yang and Cheng, Jiashun and Hu, Xiaoxing and Liang, Haoyuan and Li, Guowen and Qin, Chengwei and Cheng, Hong and Yang, Xue and Zheng, Juepeng and Fu, Haohuan}, title = {CrossEarth-Gate: Fisher-Guided Adaptive Tuning Engine for Efficient Adaptation of Cross-Domain Remote Sensing Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13222-13233} }
VMonarch: Efficient Video Diffusion Transformers with Structured Attention: Cheng Liang,

Haoxian Chen,

Liang Hou,

Qi Fan,

Gangshan Wu,

Xin Tao,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Cheng and Chen, Haoxian and Hou, Liang and Fan, Qi and Wu, Gangshan and Tao, Xin and Wang, Limin}, title = {VMonarch: Efficient Video Diffusion Transformers with Structured Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4613-4623} }
Convexity-Aware Noise Calibration: A Self-Supervised Framework for Noise-Level-Unknown Image Denoising: Zhan Wang,

Leiquan Wang,

Chunlei Wu,

Yu Meng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhan and Wang, Leiquan and Wu, Chunlei and Meng, Yu}, title = {Convexity-Aware Noise Calibration: A Self-Supervised Framework for Noise-Level-Unknown Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29929-29938} }
SGAD-SLAM: Splatting Gaussians at Adjusted Depth for Better Radiance Fields in RGBD SLAM: Pengchong Hu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Pengchong and Han, Zhizhong}, title = {SGAD-SLAM: Splatting Gaussians at Adjusted Depth for Better Radiance Fields in RGBD SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18934-18945} }
OddGridBench: Exposing the Lack of Fine-Grained Visual Discrepancy Sensitivity in Multimodal Large Language Models: Tengjin Weng,

Wenhao Jiang,

Jingyi Wang,

Ming Li,

Lin Ma,

Zhong Ming; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2026_CVPR, author = {Weng, Tengjin and Jiang, Wenhao and Wang, Jingyi and Li, Ming and Ma, Lin and Ming, Zhong}, title = {OddGridBench: Exposing the Lack of Fine-Grained Visual Discrepancy Sensitivity in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1575-1584} }
YieldSAT: A Multimodal Benchmark Dataset for High-Resolution Crop Yield Prediction: Miro Miranda,

Deepak Pathak,

Patrick Helber,

Benjamin Bischke,

Hiba Najjar,

Francisco Mena,

Cristhian Sanchez,

Akshay Pai,

Diego Arenas,

Matias Valdenegro-Toro,

Marcela Charfuelan,

Marlon Nuske,

Andreas Dengel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miranda_2026_CVPR, author = {Miranda, Miro and Pathak, Deepak and Helber, Patrick and Bischke, Benjamin and Najjar, Hiba and Mena, Francisco and Sanchez, Cristhian and Pai, Akshay and Arenas, Diego and Valdenegro-Toro, Matias and Charfuelan, Marcela and Nuske, Marlon and Dengel, Andreas}, title = {YieldSAT: A Multimodal Benchmark Dataset for High-Resolution Crop Yield Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22920-22930} }
SAIL: Similarity-Aware Guidance and Inter-Caption Augmentation-based Learning for Weakly-Supervised Dense Video Captioning: Ye-Chan Kim,

SeungJu Cha,

Si-Woo Kim,

Minju Jeon,

Hyungee Kim,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Ye-Chan and Cha, SeungJu and Kim, Si-Woo and Jeon, Minju and Kim, Hyungee and Kim, Dong-Jin}, title = {SAIL: Similarity-Aware Guidance and Inter-Caption Augmentation-based Learning for Weakly-Supervised Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3466-3475} }
ReSAM: Refine, Requery, and Reinforce: Self-Prompting Point-Supervised Segmentation for Remote Sensing Images: Muhammad Naseer Subhani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Subhani_2026_CVPR, author = {Subhani, Muhammad Naseer}, title = {ReSAM: Refine, Requery, and Reinforce: Self-Prompting Point-Supervised Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3805-3814} }
LEMON: A Large Endoscopic MONocular Dataset and Foundation Model for Perception in Surgical Settings: Chengan Che,

Chao Wang,

Tom Vercauteren,

Sophia Tsoka,

Luis C. Garcia-Peraza-Herrera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Vercauteren, Tom and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {LEMON: A Large Endoscopic MONocular Dataset and Foundation Model for Perception in Surgical Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42659-42669} }
FairLLaVA: Fairness-Aware Parameter-Efficient Fine-Tuning for Large Vision-Language Assistants: Mahesh Bhosale,

Abdul Wasi,

Shantam Srivastava,

Shifa Latif,

Tianyu Luan,

Mingchen Gao,

David Doermann,

Xuan Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhosale_2026_CVPR, author = {Bhosale, Mahesh and Wasi, Abdul and Srivastava, Shantam and Latif, Shifa and Luan, Tianyu and Gao, Mingchen and Doermann, David and Gong, Xuan}, title = {FairLLaVA: Fairness-Aware Parameter-Efficient Fine-Tuning for Large Vision-Language Assistants}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42615-42625} }
Illuminating Visual Identity in Universal Multimodal Embeddings: Jiawei Cao,

Junyi Feng,

Jiashen Hua,

Ziheng Huang,

Bing Deng,

Kaijie Wu,

Chaochen Gu,

Jieping Ye; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Jiawei and Feng, Junyi and Hua, Jiashen and Huang, Ziheng and Deng, Bing and Wu, Kaijie and Gu, Chaochen and Ye, Jieping}, title = {Illuminating Visual Identity in Universal Multimodal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8737-8748} }
Large-scale Robust Enhanced Ensemble Clustering via Outlier Decoupling: Jiaxuan Xu,

Lei Duan,

Xinye Wang,

Liang Du; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiaxuan and Duan, Lei and Wang, Xinye and Du, Liang}, title = {Large-scale Robust Enhanced Ensemble Clustering via Outlier Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39691-39700} }
FrankenMotion: Part-level Human Motion Generation and Composition: Chuqiao Li,

Xianghui Xie,

Yong Cao,

Andreas Geiger,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chuqiao and Xie, Xianghui and Cao, Yong and Geiger, Andreas and Pons-Moll, Gerard}, title = {FrankenMotion: Part-level Human Motion Generation and Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16420-16431} }
Disentanglement-wise Image Dehazing through Cross-Domain Manifold Consensus: Tianyi Lyu,

Mingye Ju,

Kai-Kuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Tianyi and Ju, Mingye and Ma, Kai-Kuang}, title = {Disentanglement-wise Image Dehazing through Cross-Domain Manifold Consensus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22733-22743} }
Event-Based Motion Deblurring Using Task-Oriented 3D Gaussian Event Representations: Shengdong Xue,

Haoxiang Ma,

Hao Chen,

Zhen Yang,

Yongjian Deng; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Shengdong and Ma, Haoxiang and Chen, Hao and Yang, Zhen and Deng, Yongjian}, title = {Event-Based Motion Deblurring Using Task-Oriented 3D Gaussian Event Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29547-29556} }
SafeGRPO: Self-Rewarded Multimodal Safety Alignment via Rule-Governed Policy Optimization: Xuankun Rong,

Wenke Huang,

Tingfeng Wang,

Daiguo Zhou,

Bo Du,

Mang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rong_2026_CVPR, author = {Rong, Xuankun and Huang, Wenke and Wang, Tingfeng and Zhou, Daiguo and Du, Bo and Ye, Mang}, title = {SafeGRPO: Self-Rewarded Multimodal Safety Alignment via Rule-Governed Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7901-7911} }
Uncertainty-driven 3D Gaussian Splatting Active Mapping via Anisotropic Visibility Field: Shangjie Xue,

Jesse Dill,

Dhruv Ahuja,

Frank Dellaert,

Panagiotis Tsiotras,

Danfei Xu; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Shangjie and Dill, Jesse and Ahuja, Dhruv and Dellaert, Frank and Tsiotras, Panagiotis and Xu, Danfei}, title = {Uncertainty-driven 3D Gaussian Splatting Active Mapping via Anisotropic Visibility Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5014-5026} }
Revisiting Sparsity Constraint Under High-Rank Property in Partial Multi-Label Learning: Chongjie Si,

Yidan Cui,

Fuchao Yang,

Wei Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Si_2026_CVPR, author = {Si, Chongjie and Cui, Yidan and Yang, Fuchao and Shen, Wei}, title = {Revisiting Sparsity Constraint Under High-Rank Property in Partial Multi-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17724-17733} }
One-Step Diffusion Transformer for Controllable Real-World Image Super-Resolution: Yushun Fang,

Yuxiang Chen,

Shibo Yin,

Qiang Hu,

Jiangchao Yao,

Ya Zhang,

Xiaoyun Zhang,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Yushun and Chen, Yuxiang and Yin, Shibo and Hu, Qiang and Yao, Jiangchao and Zhang, Ya and Zhang, Xiaoyun and Wang, Yanfeng}, title = {One-Step Diffusion Transformer for Controllable Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23440-23450} }
Radiance Meshes for Volumetric Reconstruction: Alexander Mai,

Trevor Hedstrom,

George Kopanas,

Janne Kontkanen,

Falko Kuester,

Jonathan T. Barron; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Alexander and Hedstrom, Trevor and Kopanas, George and Kontkanen, Janne and Kuester, Falko and Barron, Jonathan T.}, title = {Radiance Meshes for Volumetric Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8267-8277} }
Unified Primitive Proxies for Structured Shape Completion: Zhaiyu Chen,

Yuqing Wang,

Xiao Xiang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhaiyu and Wang, Yuqing and Zhu, Xiao Xiang}, title = {Unified Primitive Proxies for Structured Shape Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7457-7467} }
Reconstructing Spiking Neural Networks Using a Single Neuron with Autapses: Wuque Cai,

Hongze Sun,

Quan Tang,

Shifeng Mao,

Zhenxing Wang,

Jiayi He,

Duo Chen,

Dezhong Yao,

Daqing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Wuque and Sun, Hongze and Tang, Quan and Mao, Shifeng and Wang, Zhenxing and He, Jiayi and Chen, Duo and Yao, Dezhong and Guo, Daqing}, title = {Reconstructing Spiking Neural Networks Using a Single Neuron with Autapses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20283-20292} }
IsoCLIP: Decomposing CLIP Projectors for Efficient Intra-modal Alignment: Simone Magistri,

Dipam Goswami,

Marco Mistretta,

Bartłomiej Twardowski,

Joost van de Weijer,

Andrew D. Bagdanov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magistri_2026_CVPR, author = {Magistri, Simone and Goswami, Dipam and Mistretta, Marco and Twardowski, Bart{\l}omiej and van de Weijer, Joost and Bagdanov, Andrew D.}, title = {IsoCLIP: Decomposing CLIP Projectors for Efficient Intra-modal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29315-29324} }
RISE: Single Static Radar-based Indoor Scene Understanding: Kaichen Zhou,

Laura Dodds,

Sayed Saad Afzal,

Fadel Adib; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Kaichen and Dodds, Laura and Afzal, Sayed Saad and Adib, Fadel}, title = {RISE: Single Static Radar-based Indoor Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32194-32205} }
SARL-STG: A Spatially Aware Reinforcement Learning Framework for Refining MLLMs in Spatio-Temporal Video Grounding: Hong Gao,

Xiangkai Xu,

Bin Zhong,

Junjie Yin,

Fangyu Kang,

Yutong Xu,

Xiugang Dong,

Xurui Gao,

Min-Ling Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Hong and Xu, Xiangkai and Zhong, Bin and Yin, Junjie and Kang, Fangyu and Xu, Yutong and Dong, Xiugang and Gao, Xurui and Zhang, Min-Ling}, title = {SARL-STG: A Spatially Aware Reinforcement Learning Framework for Refining MLLMs in Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24630-24639} }
FlowDIS: Language-Guided Dichotomous Image Segmentation with Flow Matching: Andranik Sargsyan,

Shant Navasardyan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sargsyan_2026_CVPR, author = {Sargsyan, Andranik and Navasardyan, Shant}, title = {FlowDIS: Language-Guided Dichotomous Image Segmentation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42039-42048} }
ColorFLUX: A Structure-Color Decoupling Framework for Old Photo Colorization: Bingchen Li,

Zhixin Wang,

Fan Li,

Jiaqi Xu,

Jiaming Guo,

Renjing Pei,

Xin Li,

Zhibo Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bingchen and Wang, Zhixin and Li, Fan and Xu, Jiaqi and Guo, Jiaming and Pei, Renjing and Li, Xin and Chen, Zhibo}, title = {ColorFLUX: A Structure-Color Decoupling Framework for Old Photo Colorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15574-15584} }
Cross-Subject EEG-to-Video Reconstruction and Beyond: Runduo Han,

Hongchen Tan; [pdf]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Runduo and Tan, Hongchen}, title = {Cross-Subject EEG-to-Video Reconstruction and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23294-23303} }
MeteorPred: A Meteorological Multimodal Large Model and Dataset for Severe Weather Event Prediction: Shuo Tang,

Jian Xu,

Jiadong Zhang,

Yi Chen,

Qizhao Jin,

Lingdong Shen,

Chenglin Liu,

Shiming Xiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Shuo and Xu, Jian and Zhang, Jiadong and Chen, Yi and Jin, Qizhao and Shen, Lingdong and Liu, Chenglin and Xiang, Shiming}, title = {MeteorPred: A Meteorological Multimodal Large Model and Dataset for Severe Weather Event Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22910-22919} }
SkyReels-Text: Fine-Grained Font-Controllable Text Editing for Poster Design: Yunjie Yu,

Jingchen Wu,

Junchen Zhu,

Chunze Lin,

Guibin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Yunjie and Wu, Jingchen and Zhu, Junchen and Lin, Chunze and Chen, Guibin}, title = {SkyReels-Text: Fine-Grained Font-Controllable Text Editing for Poster Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14875-14884} }
SIGMA: A Physics-Based Benchmark for Gas Chimney Understanding in Seismic Images: Bao Truong,

Quang Nguyen,

Baoru Huang,

Jinpei Han,

Van Nguyen,

Ngan Le,

Minh-Tan Pham,

Doan Huy Hien,

Anh Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Truong_2026_CVPR, author = {Truong, Bao and Nguyen, Quang and Huang, Baoru and Han, Jinpei and Nguyen, Van and Le, Ngan and Pham, Minh-Tan and Hien, Doan Huy and Nguyen, Anh}, title = {SIGMA: A Physics-Based Benchmark for Gas Chimney Understanding in Seismic Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20542-20552} }
TeFlow: Enabling Multi-frame Supervision for Self-Supervised Feed-forward Scene Flow Estimation: Qingwen Zhang,

Chenhan Jiang,

Xiaomeng Zhu,

Yunqi Miao,

Yushan Zhang,

Olov Andersson,

Patric Jensfelt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qingwen and Jiang, Chenhan and Zhu, Xiaomeng and Miao, Yunqi and Zhang, Yushan and Andersson, Olov and Jensfelt, Patric}, title = {TeFlow: Enabling Multi-frame Supervision for Self-Supervised Feed-forward Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3667-3676} }
Batch Loss Score for Dynamic Data Pruning: Qing Zhou,

Bingxuan Zhao,

Tao Yang,

Hongyuan Zhang,

Junyu Gao,

Qi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qing and Zhao, Bingxuan and Yang, Tao and Zhang, Hongyuan and Gao, Junyu and Wang, Qi}, title = {Batch Loss Score for Dynamic Data Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6188-6197} }
OmniFood8K: Single-Image Nutrition Estimation via Hierarchical Frequency-Aligned Fusion: Dongjian Yu,

Weiqing Min,

Qian Jiang,

Xing Lin,

Xin Jin,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Dongjian and Min, Weiqing and Jiang, Qian and Lin, Xing and Jin, Xin and Jiang, Shuqiang}, title = {OmniFood8K: Single-Image Nutrition Estimation via Hierarchical Frequency-Aligned Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41562-41572} }
SketchFaceGS: Real-Time Sketch-Driven Face Editing and Generation with Gaussian Splatting: Bo Li,

Jiahao Kang,

Yubo Ma,

Feng-Lin Liu,

Bin Liu,

Fang-Lue Zhang,

Lin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Bo and Kang, Jiahao and Ma, Yubo and Liu, Feng-Lin and Liu, Bin and Zhang, Fang-Lue and Gao, Lin}, title = {SketchFaceGS: Real-Time Sketch-Driven Face Editing and Generation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40020-40030} }
DeepfakeImpact: A Two-Stage Benchmark with Real-World Impact in Deepfake Detection: Chaoyu Gong,

Han Zhang,

Siqiang Luo; [pdf]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Chaoyu and Zhang, Han and Luo, Siqiang}, title = {DeepfakeImpact: A Two-Stage Benchmark with Real-World Impact in Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35451-35461} }
Spatio-Temporal Conditional Denoising Transformer for Modality-Missing RGBT Tracking: Andong Lu,

Ziyi Zha,

Jiandong Jin,

Shihao Li,

Chenglong Li,

Jin Tang,

Bin Luo; [pdf]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Andong and Zha, Ziyi and Jin, Jiandong and Li, Shihao and Li, Chenglong and Tang, Jin and Luo, Bin}, title = {Spatio-Temporal Conditional Denoising Transformer for Modality-Missing RGBT Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13584-13593} }
Learning Like Humans: Analogical Concept Learning for Generalized Category Discovery: Jizhou Han,

Chenhao Ding,

Yuhang He,

Qiang Wang,

Shaokun Wang,

SongLin Dong,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jizhou and Ding, Chenhao and He, Yuhang and Wang, Qiang and Wang, Shaokun and Dong, SongLin and Gong, Yihong}, title = {Learning Like Humans: Analogical Concept Learning for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28734-28743} }
Keep It Frozen: Domain-Routed Conditional Residual Modulation for Multi-Domain Vision Transformers: Ufaq Khan,

Umair Nawaz,

Massimo Caputo,

Muhammad Bilal,

Junaid Qadir,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{Khan_2026_CVPR, author = {Khan, Ufaq and Nawaz, Umair and Caputo, Massimo and Bilal, Muhammad and Qadir, Junaid and Khan, Muhammad Haris}, title = {Keep It Frozen: Domain-Routed Conditional Residual Modulation for Multi-Domain Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21016-21025} }
Logit-Margin Repulsion for Backdoor Defense: Zhiguo Yang,

Dongsheng Xu,

Ruizhi Zhong,

Jiacheng Pi,

Xingxing Huang,

Wenjie Ruan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiguo and Xu, Dongsheng and Zhong, Ruizhi and Pi, Jiacheng and Huang, Xingxing and Ruan, Wenjie}, title = {Logit-Margin Repulsion for Backdoor Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34918-34928} }
Compositional Text-to-Image Generation Via Region-aware Bimodal Direct Preference Optimization: Zhuohan Liu,

Wujian Peng,

Yitong Chen,

Zuxuan Wu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhuohan and Peng, Wujian and Chen, Yitong and Wu, Zuxuan}, title = {Compositional Text-to-Image Generation Via Region-aware Bimodal Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36604-36614} }
Agentic Video Summarization via Self-Reflecting Multimodal Understanding: Miaotian Guo,

Shuguang Dou,

Yin Li,

Aidong Men,

Dongsheng Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Miaotian and Dou, Shuguang and Li, Yin and Men, Aidong and Jiang, Dongsheng}, title = {Agentic Video Summarization via Self-Reflecting Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40516-40526} }
SignPR: A Progressive Vector-Quantized Diffusion Framework for Sign Language Production: Xiao Liu,

Shiwei Gan,

Yafeng Yin,

Bowen Guo,

Zhiwei Jiang,

Shunmei Meng,

Lei Xie,

Sanglu Lu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiao and Gan, Shiwei and Yin, Yafeng and Guo, Bowen and Jiang, Zhiwei and Meng, Shunmei and Xie, Lei and Lu, Sanglu}, title = {SignPR: A Progressive Vector-Quantized Diffusion Framework for Sign Language Production}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2198-2208} }
WorldMM: Dynamic Multimodal Memory Agent for Long Video Reasoning: Woongyeong Yeo,

Kangsan Kim,

Jaehong Yoon,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeo_2026_CVPR, author = {Yeo, Woongyeong and Kim, Kangsan and Yoon, Jaehong and Hwang, Sung Ju}, title = {WorldMM: Dynamic Multimodal Memory Agent for Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25599-25609} }
GaussianPile: A Unified Sparse Gaussian Splatting Framework for Slice-based Volumetric Reconstruction: Di Kong,

Yikai Wang,

Wenjie Guo,

Yifan Bu,

Boya Zhang,

Yuexin Duan,

Xiawei Yue,

Wenbiao Du,

Yiman Zhong,

Yuwen Chen,

Cheng Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Di and Wang, Yikai and Guo, Wenjie and Bu, Yifan and Zhang, Boya and Duan, Yuexin and Yue, Xiawei and Du, Wenbiao and Zhong, Yiman and Chen, Yuwen and Ma, Cheng}, title = {GaussianPile: A Unified Sparse Gaussian Splatting Framework for Slice-based Volumetric Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19022-19032} }
Grid Distillation: Compositional Image Distillation via Structured Generative Grids: Biplab Ch Das,

Shouvik Das,

Viswanath Gopalakrishnan; [pdf] [supp]
[bibtex]
@InProceedings{Das_2026_CVPR, author = {Das, Biplab Ch and Das, Shouvik and Gopalakrishnan, Viswanath}, title = {Grid Distillation: Compositional Image Distillation via Structured Generative Grids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19644-19653} }
SpotEdit: Selective Region Editing in Diffusion Transformers: Zhibin Qin,

Zhenxiong Tan,

Zeqing Wang,

Songhua Liu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Zhibin and Tan, Zhenxiong and Wang, Zeqing and Liu, Songhua and Wang, Xinchao}, title = {SpotEdit: Selective Region Editing in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18683-18692} }
Easy2Hard: From Partially to Fully Unmatched Modalities as Negative Samples in Contrastive Learning: Zhicheng Yang,

Yichen Liu,

Chang Ge,

Xiaopeng Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhicheng and Liu, Yichen and Ge, Chang and Jiang, Xiaopeng}, title = {Easy2Hard: From Partially to Fully Unmatched Modalities as Negative Samples in Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30226-30234} }
Revisiting the Necessity of Full Accuracy: Weakly Supervised Object-Level Offset Correction for Misaligned Building Labels: Junda Xu,

Yanmeng Liu,

Xiangqiang Zeng,

Jinrong Wu,

Ying Qu,

Libao Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Junda and Liu, Yanmeng and Zeng, Xiangqiang and Wu, Jinrong and Qu, Ying and Zhang, Libao}, title = {Revisiting the Necessity of Full Accuracy: Weakly Supervised Object-Level Offset Correction for Misaligned Building Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34854-34864} }
EgoPoseFormer v2: Accurate Egocentric Human Motion Estimation for AR/VR: Zhenyu Li,

Sai Kumar Dwivedi,

Filip Maric,

Carlos Chacón,

Nadine Bertsch,

Filippo Arcadu,

Tomas Hodan,

Michael Ramamonjisoa,

Peter Wonka,

Amy Zhao,

Robin Kips,

Cem Keskin,

Anastasia Tkach,

Chenhongyi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhenyu and Dwivedi, Sai Kumar and Maric, Filip and Chac\'on, Carlos and Bertsch, Nadine and Arcadu, Filippo and Hodan, Tomas and Ramamonjisoa, Michael and Wonka, Peter and Zhao, Amy and Kips, Robin and Keskin, Cem and Tkach, Anastasia and Yang, Chenhongyi}, title = {EgoPoseFormer v2: Accurate Egocentric Human Motion Estimation for AR/VR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21121-21131} }
Skullptor: High Fidelity 3D Head Reconstruction in Seconds with Multi-View Normal Prediction: Noé Artru,

Rukhshanda Hussain,

Emeline Got,

Alexandre Messier,

David B. Lindell,

Abdallah Dib; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Artru_2026_CVPR, author = {Artru, No\'e and Hussain, Rukhshanda and Got, Emeline and Messier, Alexandre and Lindell, David B. and Dib, Abdallah}, title = {Skullptor: High Fidelity 3D Head Reconstruction in Seconds with Multi-View Normal Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25248-25257} }
PureProof: Diffusion-Resistant Black-box Targeted Attack on Large Vision-Language Models: Yiming Cao,

Dong Wang,

Xinqi Lyu,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yiming and Wang, Dong and Lyu, Xinqi and Xiao, Bin}, title = {PureProof: Diffusion-Resistant Black-box Targeted Attack on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8621-8630} }
ODGS-SLAM: Omnidirectional Gaussian Splatting SLAM: Stefan Spiss,

Joey Hieronimy,

Marcel Ritter,

Matthias Harders; [pdf] [supp]
[bibtex]
@InProceedings{Spiss_2026_CVPR, author = {Spiss, Stefan and Hieronimy, Joey and Ritter, Marcel and Harders, Matthias}, title = {ODGS-SLAM: Omnidirectional Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26114-26123} }
Factorize, Reconstruct, Enhance: A Unified Framework for Multimodal Sentiment Analysis: Zhilu Yang,

Mingcheng Li; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhilu and Li, Mingcheng}, title = {Factorize, Reconstruct, Enhance: A Unified Framework for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15860-15869} }
LaS-Comp: Zero-shot 3D Completion with Latent-Spatial Consistency: Weilong Yan,

Haipeng Li,

Hao Xu,

Nianjin Ye,

Yihao Ai,

Shuaicheng Liu,

Jingyu Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Weilong and Li, Haipeng and Xu, Hao and Ye, Nianjin and Ai, Yihao and Liu, Shuaicheng and Hu, Jingyu}, title = {LaS-Comp: Zero-shot 3D Completion with Latent-Spatial Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7588-7599} }
EchoVDiff: Cardiac-Cycle Echocardiography Video Generation from Arbitrary Single Frame: Jiansong Zhang,

Xiaying Yang,

Xiaoling Luo,

Linlin Shen; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiansong and Yang, Xiaying and Luo, Xiaoling and Shen, Linlin}, title = {EchoVDiff: Cardiac-Cycle Echocardiography Video Generation from Arbitrary Single Frame}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9040-9050} }
InterPhys: Physics-aware Human Motion Synthesis in a Dynamic Scene: Chaoyue Xing,

Wei Mao,

Miaomiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2026_CVPR, author = {Xing, Chaoyue and Mao, Wei and Liu, Miaomiao}, title = {InterPhys: Physics-aware Human Motion Synthesis in a Dynamic Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30729-30739} }
GS-ASM: 2DGS-Supervised Active Stereo Matching: Zhengling Wu,

Rongfeng Lu,

Quan Chen,

Longjian Zeng,

Ming Lu,

Yaoqi Sun,

Yahong Chen,

Baofeng Ji,

Chenggang Yan; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhengling and Lu, Rongfeng and Chen, Quan and Zeng, Longjian and Lu, Ming and Sun, Yaoqi and Chen, Yahong and Ji, Baofeng and Yan, Chenggang}, title = {GS-ASM: 2DGS-Supervised Active Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26888-26898} }
Consensus Entropy: Harnessing Multi-VLM Agreement for Self-Verifying and Self-Improving OCR: Yulong Zhang,

Tianyi Liang,

Erfei Cui,

Guoqing Wang,

Xu Guo,

Chenhui Li,

Gongshen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yulong and Liang, Tianyi and Cui, Erfei and Wang, Guoqing and Guo, Xu and Li, Chenhui and Liu, Gongshen}, title = {Consensus Entropy: Harnessing Multi-VLM Agreement for Self-Verifying and Self-Improving OCR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11643-11653} }
Taming Noise-Induced Prototype Degradation for Privacy-Preserving Personalized Federated Fine-Tuning: Yuhua Wang,

Qinnan Zhang,

Xiaodong Li,

Huan Zhang,

Yifan Sun,

Wangjie Qiu,

Hainan Zhang,

Yongxin Tong,

Zhiming Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhua and Zhang, Qinnan and Li, Xiaodong and Zhang, Huan and Sun, Yifan and Qiu, Wangjie and Zhang, Hainan and Tong, Yongxin and Zheng, Zhiming}, title = {Taming Noise-Induced Prototype Degradation for Privacy-Preserving Personalized Federated Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39414-39424} }
AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation: Luoxi Jing,

Dianxi Shi,

Yushe Cao,

Yuanze Wang,

Junze Zhang,

Yuning Cui,

Mengzhu Wang; [pdf]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Luoxi and Shi, Dianxi and Cao, Yushe and Wang, Yuanze and Zhang, Junze and Cui, Yuning and Wang, Mengzhu}, title = {AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8033-8044} }
Interpretable and Steerable Concept Bottleneck Sparse Autoencoders: Akshay Kulkarni,

Tsui-Wei Weng,

Vivek Narayanaswamy,

Shusen Liu,

Wesam A. Sakla,

Kowshik Thopalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Akshay and Weng, Tsui-Wei and Narayanaswamy, Vivek and Liu, Shusen and Sakla, Wesam A. and Thopalli, Kowshik}, title = {Interpretable and Steerable Concept Bottleneck Sparse Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2918-2927} }
EV-CGNet: Co-visible Focused 3D-guided 2D Event Keypoint Detection Network: Yuan Gao,

Tianle Ding,

Yuqing Zhu,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuan and Ding, Tianle and Zhu, Yuqing and Zhang, Tianzhu}, title = {EV-CGNet: Co-visible Focused 3D-guided 2D Event Keypoint Detection Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15105-15114} }
Meta-FC: Meta-Learning with Feature Consistency for Robust and Generalizable Watermarking: Yuheng Li,

Weitong Chen,

Chengcheng Zhu,

Jiale Zhang,

Chunpeng Ge,

Di Wu,

Guodong Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yuheng and Chen, Weitong and Zhu, Chengcheng and Zhang, Jiale and Ge, Chunpeng and Wu, Di and Long, Guodong}, title = {Meta-FC: Meta-Learning with Feature Consistency for Robust and Generalizable Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17420-17429} }
Token Warping Helps MLLMs Look from Nearby Viewpoints: Phillip Y. Lee,

Chanho Park,

Mingue Park,

Seungwoo Yoo,

Juil Koo,

Minhyuk Sung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Phillip Y. and Park, Chanho and Park, Mingue and Yoo, Seungwoo and Koo, Juil and Sung, Minhyuk}, title = {Token Warping Helps MLLMs Look from Nearby Viewpoints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3476-3488} }
Predictive Regularization Against Visual Representation Degradation in Multimodal Large Language Models: Enguang Wang,

Qiang Wang,

Yuanchen Wu,

Ke Yan,

Xinbin Yuan,

Shouhong Ding,

Xialei Liu,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Enguang and Wang, Qiang and Wu, Yuanchen and Yan, Ke and Yuan, Xinbin and Ding, Shouhong and Liu, Xialei and Cheng, Ming-Ming}, title = {Predictive Regularization Against Visual Representation Degradation in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8813-8824} }
MakeAnything: Harnessing Diffusion Transformers for Multi-Domain Procedural Sequence Generation: Yiren Song,

Cheng Liu,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Yiren and Liu, Cheng and Shou, Mike Zheng}, title = {MakeAnything: Harnessing Diffusion Transformers for Multi-Domain Procedural Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11482-11492} }
Depth Peeling for High-Fidelity Gaussian-Enhanced Surfel Rendering: Keyang Ye,

Hongzhi Wu,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Keyang and Wu, Hongzhi and Zhou, Kun}, title = {Depth Peeling for High-Fidelity Gaussian-Enhanced Surfel Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22561-22570} }
Learning Mutual View Information Graph for Adaptive Adversarial Collaborative Perception: Yihang Tao,

Senkang Hu,

Haonan An,

Zhengru Fang,

Hangcheng Cao,

Yuguang Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Yihang and Hu, Senkang and An, Haonan and Fang, Zhengru and Cao, Hangcheng and Fang, Yuguang}, title = {Learning Mutual View Information Graph for Adaptive Adversarial Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42321-42330} }
Hierarchical Point-Patch Fusion with Adaptive Patch Codebook for 3D Shape Anomaly Detection: Xueyang Kang,

Zizhao Li,

Tian Lan,

Dong Gong,

Kourosh Khoshelham,

Liangliang Nan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Xueyang and Li, Zizhao and Lan, Tian and Gong, Dong and Khoshelham, Kourosh and Nan, Liangliang}, title = {Hierarchical Point-Patch Fusion with Adaptive Patch Codebook for 3D Shape Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24258-24267} }
Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping: Sunghyun Park,

Jeongho Kim,

Hyoungwoo Park,

Debasmit Das,

Sungrack Yun,

Munawar Hayat,

Jaegul Choo,

Fatih Porikli,

Seokeon Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Sunghyun and Kim, Jeongho and Park, Hyoungwoo and Das, Debasmit and Yun, Sungrack and Hayat, Munawar and Choo, Jaegul and Porikli, Fatih and Choi, Seokeon}, title = {Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11504-11514} }
Confidence-Guided Multi-Scale Aggregation for Sparse-View High-Resolution 3D Gaussian Splatting: Qinzheng Zhou,

Zaychik Liu,

Lijing Lu,

Zhihang Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qinzheng and Liu, Zaychik and Lu, Lijing and Li, Zhihang}, title = {Confidence-Guided Multi-Scale Aggregation for Sparse-View High-Resolution 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19054-19064} }
pH-Strips for Selective Forgetting: A Blunt but Fast Diagnostic Baseline for Machine Unlearning: Chengyao Qian,

Jing Wu,

Trung Le,

Dinh Phung,

Mehrtash Harandi; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Chengyao and Wu, Jing and Le, Trung and Phung, Dinh and Harandi, Mehrtash}, title = {pH-Strips for Selective Forgetting: A Blunt but Fast Diagnostic Baseline for Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3306-3315} }
ActAvatar: Temporally-Aware Precise Action Control for Talking Avatars: Ziqiao Peng,

Yi Chen,

Yifeng Ma,

Guozhen Zhang,

Zhiyao Sun,

Zixiang Zhou,

Youliang Zhang,

Zhengguang Zhou,

Zhaoxin Fan,

Hongyan Liu,

Yuan Zhou,

Qinglin Lu,

Jun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Ziqiao and Chen, Yi and Ma, Yifeng and Zhang, Guozhen and Sun, Zhiyao and Zhou, Zixiang and Zhang, Youliang and Zhou, Zhengguang and Fan, Zhaoxin and Liu, Hongyan and Zhou, Yuan and Lu, Qinglin and He, Jun}, title = {ActAvatar: Temporally-Aware Precise Action Control for Talking Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39983-39993} }
Enhancing Video Vision Language Model with Hippocampal Sensing: Xu Cao; [pdf]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Xu}, title = {Enhancing Video Vision Language Model with Hippocampal Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33682-33692} }
EgoMind: Activating Spatial Cognition through Linguistic Reasoning in MLLMs: Zhenghao Chen,

Huiqun Wang,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zhenghao and Wang, Huiqun and Huang, Di}, title = {EgoMind: Activating Spatial Cognition through Linguistic Reasoning in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38616-38626} }
COPE: Consistent Occlusion and Prompt Enhancement Network for Occluded Person Re-identification: Siyi Sun,

Jinliang Lin,

Juanjuan Weng,

Zhihui Liu,

Shaozi Li,

Zhiming Luo; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Siyi and Lin, Jinliang and Weng, Juanjuan and Liu, Zhihui and Li, Shaozi and Luo, Zhiming}, title = {COPE: Consistent Occlusion and Prompt Enhancement Network for Occluded Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11293-11302} }
SPREAD: Spatial-Physical REasoning via geometry Aware Diffusion: Minzhang Li,

Kuixiang Shao,

Xuebing Li,

Yuyang Jiao,

Yinuo Bai,

Hengan Zhou,

Sixian Shen,

Jiayuan Gu,

Jingyi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Minzhang and Shao, Kuixiang and Li, Xuebing and Jiao, Yuyang and Bai, Yinuo and Zhou, Hengan and Shen, Sixian and Gu, Jiayuan and Yu, Jingyi}, title = {SPREAD: Spatial-Physical REasoning via geometry Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31008-31018} }
DMAligner: Enhancing Image Alignment via Diffusion Model Based View Synthesis: Xinglong Luo,

Ao Luo,

Zhengning Wang,

Yueqi Yang,

Chaoyu Feng,

Lei Lei,

Bing Zeng,

Shuaicheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Xinglong and Luo, Ao and Wang, Zhengning and Yang, Yueqi and Feng, Chaoyu and Lei, Lei and Zeng, Bing and Liu, Shuaicheng}, title = {DMAligner: Enhancing Image Alignment via Diffusion Model Based View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16541-16550} }
ReflexSplit: Single Image Reflection Separation via Layer Fusion-Separation: Chia-Ming Lee,

Yu-Fan Lin,

Jin-Hui Jiang,

Yu-Jou Hsiao,

Chih-Chung Hsu,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Chia-Ming and Lin, Yu-Fan and Jiang, Jin-Hui and Hsiao, Yu-Jou and Hsu, Chih-Chung and Liu, Yu-Lun}, title = {ReflexSplit: Single Image Reflection Separation via Layer Fusion-Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1300-1309} }
SHands: A Multi-View Dataset and Benchmark for Surgical Hand-Gesture and Error Recognition Toward Medical Training: Le Ma,

Thiago Freitas dos Santos,

Nadia Magnenat-Thalmann,

Katarzyna Wac; [pdf]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Le and dos Santos, Thiago Freitas and Magnenat-Thalmann, Nadia and Wac, Katarzyna}, title = {SHands: A Multi-View Dataset and Benchmark for Surgical Hand-Gesture and Error Recognition Toward Medical Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42879-42890} }
Retrieving Counterfactuals Improves Visual In-Context Learning: Guangzhi Xiong,

Sanchit Sinha,

Zhenghao He,

Aidong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Guangzhi and Sinha, Sanchit and He, Zhenghao and Zhang, Aidong}, title = {Retrieving Counterfactuals Improves Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24352-24362} }
Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack: Chenyang Li,

Wenbing Tang,

Yihao Huang,

Sinong Simon Zhan,

Ming Hu,

Xiaojun Jia,

Yang Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenyang and Tang, Wenbing and Huang, Yihao and Zhan, Sinong Simon and Hu, Ming and Jia, Xiaojun and Liu, Yang}, title = {Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1565-1574} }
LeapAlign: Post-training Flow Matching Models at Any Generation Step by Building Two-Step Trajectories: Zhanhao Liang,

Tao Yang,

Jie Wu,

Chengjian Feng,

Liang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhanhao and Yang, Tao and Wu, Jie and Feng, Chengjian and Zheng, Liang}, title = {LeapAlign: Post-training Flow Matching Models at Any Generation Step by Building Two-Step Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23238-23248} }
EmbodiedSplat: Online Feed-Forward Semantic 3DGS for Open-Vocabulary 3D Scene Understanding: Seungjun Lee,

Zihan Wang,

Yunsong Wang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seungjun and Wang, Zihan and Wang, Yunsong and Lee, Gim Hee}, title = {EmbodiedSplat: Online Feed-Forward Semantic 3DGS for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23774-23784} }
From Infusion to Assimilation Distillation for Medical Image Segmentation: Jiankang Hong,

Ye Luo,

Yinan Liu,

Junsong Yuan; [pdf]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Jiankang and Luo, Ye and Liu, Yinan and Yuan, Junsong}, title = {From Infusion to Assimilation Distillation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20985-20995} }
UniGenDet: A Unified Generative-Discriminative Framework for Co-Evolutionary Image Generation and Generated Image Detection: Yanran Zhang,

Wenzhao Zheng,

Yifei Li,

Bingyao Yu,

Yu Zheng,

Lei Chen,

Jiwen Lu,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yanran and Zheng, Wenzhao and Li, Yifei and Yu, Bingyao and Zheng, Yu and Chen, Lei and Lu, Jiwen and Zhou, Jie}, title = {UniGenDet: A Unified Generative-Discriminative Framework for Co-Evolutionary Image Generation and Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16226-16236} }
Geometry-as-context: Modulating Explicit 3D in Scene-consistent Video Generation to Geometry Context: JiaKui Hu,

Jialun Liu,

Liying Yang,

Xinliang Zhang,

Kaiwen Li,

Shuang Zeng,

Yuanwei Li,

Haibin Huang,

Chi Zhang,

Yanye Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, JiaKui and Liu, Jialun and Yang, Liying and Zhang, Xinliang and Li, Kaiwen and Zeng, Shuang and Li, Yuanwei and Huang, Haibin and Zhang, Chi and Lu, Yanye}, title = {Geometry-as-context: Modulating Explicit 3D in Scene-consistent Video Generation to Geometry Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4258-4268} }
Changes in Real Time: Online Scene Change Detection with Multi-View Fusion: Chamuditha Jayanga Galappaththige,

Jason Lai,

Lloyd Windrim,

Donald Dansereau,

Niko Suenderhauf,

Dimity Miller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Galappaththige_2026_CVPR, author = {Galappaththige, Chamuditha Jayanga and Lai, Jason and Windrim, Lloyd and Dansereau, Donald and Suenderhauf, Niko and Miller, Dimity}, title = {Changes in Real Time: Online Scene Change Detection with Multi-View Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32246-32256} }
SABER: Spatially Consistent 3D Universal Adversarial Objects for BEV Detectors: Aixuan Li,

Mochu Xiang,

Bosen Hou,

Zhexiong Wan,

Jing Zhang,

Yuchao Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Aixuan and Xiang, Mochu and Hou, Bosen and Wan, Zhexiong and Zhang, Jing and Dai, Yuchao}, title = {SABER: Spatially Consistent 3D Universal Adversarial Objects for BEV Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25841-25850} }
PAUL: Uncertainty-Guided Partition and Augmentation for Robust Cross-View Geo-Localization under Noisy Correspondence: Zheng Li,

Xueyi Zhang,

Yanming Guo,

Yuxiang Xie,

Zhaoyun Ding,

Siqi Cai,

Haizhou Li,

Mingrui Lao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zheng and Zhang, Xueyi and Guo, Yanming and Xie, Yuxiang and Ding, Zhaoyun and Cai, Siqi and Li, Haizhou and Lao, Mingrui}, title = {PAUL: Uncertainty-Guided Partition and Augmentation for Robust Cross-View Geo-Localization under Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5389-5398} }
Superman: Unifying Skeleton and Vision for Human Motion Perception and Generation: Xinshun Wang,

Peiming Li,

Ziyi Wang,

Zhongbin Fang,

Zhichao Deng,

Songtao Wu,

Jason Li,

Mengyuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xinshun and Li, Peiming and Wang, Ziyi and Fang, Zhongbin and Deng, Zhichao and Wu, Songtao and Li, Jason and Liu, Mengyuan}, title = {Superman: Unifying Skeleton and Vision for Human Motion Perception and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38335-38344} }
Controllable Federated Prompt Learning at Test Time: Rui Zhu,

Liang Bai,

Yanming Guo,

Yirun Ruan,

Tianyuan Yu,

Zhihe Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Rui and Bai, Liang and Guo, Yanming and Ruan, Yirun and Yu, Tianyuan and Lu, Zhihe}, title = {Controllable Federated Prompt Learning at Test Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39455-39465} }
LightMover: Generative Light Movement with Color and Intensity Controls: Gengze Zhou,

Tianyu Wang,

Soo Ye Kim,

Zhixin Shu,

Xin Yu,

Yannick Hold-Geoffroy,

Sumit Chaturvedi,

Qi Wu,

Zhe Lin,

Scott Cohen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Gengze and Wang, Tianyu and Kim, Soo Ye and Shu, Zhixin and Yu, Xin and Hold-Geoffroy, Yannick and Chaturvedi, Sumit and Wu, Qi and Lin, Zhe and Cohen, Scott}, title = {LightMover: Generative Light Movement with Color and Intensity Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8997-9007} }
Diffusion Sampling Path Tells More: An Efficient Plug-and-Play Strategy for Sample Filtering: Sixian Wang,

Zhiwei Tang,

Tsung-Hui Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Sixian and Tang, Zhiwei and Chang, Tsung-Hui}, title = {Diffusion Sampling Path Tells More: An Efficient Plug-and-Play Strategy for Sample Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36030-36039} }
EMR-Diff: Edge-aware Multimodal Residual Diffusion Model for Hyperspectral Image Super-resolution: Tao Zhang,

Shengtao Yao,

Rong Zeng,

Zunjie Zhu,

Bolun Zheng,

Yaoqi Sun,

Ying Fu,

Chenggang Yan; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tao and Yao, Shengtao and Zeng, Rong and Zhu, Zunjie and Zheng, Bolun and Sun, Yaoqi and Fu, Ying and Yan, Chenggang}, title = {EMR-Diff: Edge-aware Multimodal Residual Diffusion Model for Hyperspectral Image Super-resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23419-23429} }
Group Editing: Edit Multiple Images in One Go: Yue Ma,

Xinyu Wang,

Qianli Ma,

Qinghe Wang,

Mingzhe Zheng,

Xiangpeng Yang,

Hao Li,

Chongbo Zhao,

Jixuan Ying,

Harry Yang,

Hongyu Liu,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yue and Wang, Xinyu and Ma, Qianli and Wang, Qinghe and Zheng, Mingzhe and Yang, Xiangpeng and Li, Hao and Zhao, Chongbo and Ying, Jixuan and Yang, Harry and Liu, Hongyu and Chen, Qifeng}, title = {Group Editing: Edit Multiple Images in One Go}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43418-43428} }
I2I-Bench: A Comprehensive Benchmark Suite for Image-to-Image Editing Models: Juntong Wang,

Jiarui Wang,

Huiyu Duan,

Jiaxiang Kang,

Guangtao Zhai,

Xiongkuo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Juntong and Wang, Jiarui and Duan, Huiyu and Kang, Jiaxiang and Zhai, Guangtao and Min, Xiongkuo}, title = {I2I-Bench: A Comprehensive Benchmark Suite for Image-to-Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15354-15364} }
Lumosaic: Hyperspectral Video via Active Illumination and Coded-Exposure Pixels: Dhruv Verma,

Andrew Qiu,

Roberto Rangel,

Ayandev Barman,

Hao Yang,

Chenjia Hu,

Fengqi Zhang,

Roman Genov,

David B. Lindell,

Kiriakos N. Kutulakos,

Alex Mariakakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Verma_2026_CVPR, author = {Verma, Dhruv and Qiu, Andrew and Rangel, Roberto and Barman, Ayandev and Yang, Hao and Hu, Chenjia and Zhang, Fengqi and Genov, Roman and Lindell, David B. and Kutulakos, Kiriakos N. and Mariakakis, Alex}, title = {Lumosaic: Hyperspectral Video via Active Illumination and Coded-Exposure Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26761-26771} }
HERO: Hierarchical Embedding-Refinement for Open-Vocabulary Temporal Sentence Grounding in Videos: Tingting Han,

Xinsong Tao,

Yufei Yin,

Min Tan,

Sicheng Zhao,

Zhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Tingting and Tao, Xinsong and Yin, Yufei and Tan, Min and Zhao, Sicheng and Yu, Zhou}, title = {HERO: Hierarchical Embedding-Refinement for Open-Vocabulary Temporal Sentence Grounding in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31867-31876} }
Conversational Image Segmentation: Grounding Abstract Concepts with Scalable Supervision: Aadarsh Sahoo,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sahoo_2026_CVPR, author = {Sahoo, Aadarsh and Gkioxari, Georgia}, title = {Conversational Image Segmentation: Grounding Abstract Concepts with Scalable Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39476-39485} }
SAMosaic3D: Modular Scene Assembly for Real-Time 3D Segment Anything: Peng Wang,

Yongcai Wang,

Wang Chen,

Hualong Cao,

Kang Yang,

Chunxu Li,

Jie Wen,

Deying Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Peng and Wang, Yongcai and Chen, Wang and Cao, Hualong and Yang, Kang and Li, Chunxu and Wen, Jie and Li, Deying}, title = {SAMosaic3D: Modular Scene Assembly for Real-Time 3D Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17894-17904} }
InfinityHuman: Towards Long-Term Audio-Driven Human Animation: Xiaodi Li,

Pan Xie,

Yi Ren,

Qijun Gan,

Chen Zhang,

Fangyuan Kong,

Xiang Yin,

Zehuan Yuan,

Bingyue Peng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaodi and Xie, Pan and Ren, Yi and Gan, Qijun and Zhang, Chen and Kong, Fangyuan and Yin, Xiang and Yuan, Zehuan and Peng, Bingyue}, title = {InfinityHuman: Towards Long-Term Audio-Driven Human Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3978-3987} }
Flat-Pack Bench: Evaluating Spatio-Temporal Understanding in Large Vision-Language Models through Furniture Assembly: Aditya Chetan,

Eric Cai,

Peeyush Kushwaha,

Bharath Raj Nagoor Kani,

Utkarsh Mall,

Qianqian Wang,

Noah Snavely,

Bharath Hariharan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chetan_2026_CVPR, author = {Chetan, Aditya and Cai, Eric and Kushwaha, Peeyush and Kani, Bharath Raj Nagoor and Mall, Utkarsh and Wang, Qianqian and Snavely, Noah and Hariharan, Bharath}, title = {Flat-Pack Bench: Evaluating Spatio-Temporal Understanding in Large Vision-Language Models through Furniture Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16624-16634} }
Demo2Tutorial: From Human Experience to Multimodal Software Tutorials: Zechen Bai,

Zhiheng Chen,

Yiqi Lin,

Kevin Qinghong Lin,

Difei Gao,

Xiangwu Guo,

Xin Wang,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Zechen and Chen, Zhiheng and Lin, Yiqi and Lin, Kevin Qinghong and Gao, Difei and Guo, Xiangwu and Wang, Xin and Shou, Mike Zheng}, title = {Demo2Tutorial: From Human Experience to Multimodal Software Tutorials}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29588-29597} }
Reliable Clustering Number Estimation for Contrastive Multi-View Clustering: Zhengzhong Zhu,

Pei Zhou,

Lanxi Bai,

Li Cheng,

Jia Nie,

Shiquan Min,

Jiangping Zhu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Zhou, Pei and Bai, Lanxi and Cheng, Li and Nie, Jia and Min, Shiquan and Zhu, Jiangping}, title = {Reliable Clustering Number Estimation for Contrastive Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30162-30171} }
VoDaSuRe: A Large-Scale Dataset Revealing Domain Shift in Volumetric Super-Resolution: August Leander Høeg,

Sophia Wiinberg Bardenfleth,

Hans Martin Kjer,

Tim Bjørn Dyrby,

Vedrana Andersen Dahl,

Anders Bjorholm Dahl; [pdf] [supp]
[bibtex]
@InProceedings{Hoeg_2026_CVPR, author = {H{\o}eg, August Leander and Bardenfleth, Sophia Wiinberg and Kjer, Hans Martin and Dyrby, Tim Bj{\o}rn and Dahl, Vedrana Andersen and Dahl, Anders Bjorholm}, title = {VoDaSuRe: A Large-Scale Dataset Revealing Domain Shift in Volumetric Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2166-2176} }
F$^2$-Assist: Multi-Phase Fetal Growth Forecast and Report Generation from Ultrasound Examination: Bin Pu,

Xusheng Liang,

Xinpeng Ding,

Jinlin Wu,

Zhen Lei,

Shengli Li,

Kenli Li,

Jiawei Ma; [pdf]
[bibtex]
@InProceedings{Pu_2026_CVPR, author = {Pu, Bin and Liang, Xusheng and Ding, Xinpeng and Wu, Jinlin and Lei, Zhen and Li, Shengli and Li, Kenli and Ma, Jiawei}, title = {F\${\textasciicircum}2\$-Assist: Multi-Phase Fetal Growth Forecast and Report Generation from Ultrasound Examination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35341-35350} }
Online3R: Online Learning for Consistent Sequential Reconstruction Based on Geometry Foundation Model: Shunkai Zhou,

Zike Yan,

Fei Xue,

Dong Wu,

Yuchen Deng,

Hongbin Zha; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shunkai and Yan, Zike and Xue, Fei and Wu, Dong and Deng, Yuchen and Zha, Hongbin}, title = {Online3R: Online Learning for Consistent Sequential Reconstruction Based on Geometry Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36529-36538} }
VDFE: Difference-Aware 3D Scene Editing with Non-Intrusive Video Diffusion Priors for Multi-View Consistency and Efficiency: Chao Zhang,

Fang Liu,

Shuo Li,

Yang Liu,

Jiahao Wang,

Xinyan Huang,

Lingling Li,

Puhua Chen,

Xu Liu,

Wenping Ma,

Siqi Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chao and Liu, Fang and Li, Shuo and Liu, Yang and Wang, Jiahao and Huang, Xinyan and Li, Lingling and Chen, Puhua and Liu, Xu and Ma, Wenping and Yu, Siqi}, title = {VDFE: Difference-Aware 3D Scene Editing with Non-Intrusive Video Diffusion Priors for Multi-View Consistency and Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18375-18385} }
RigMo: Unifying Rig and Motion Learning for Generative Animation: Hao Zhang,

Jiahao Luo,

Bohui Wan,

Yizhou Zhao,

Zongrui Li,

Michael Vasilkovsky,

Chaoyang Wang,

Jian Wang,

Narendra Ahuja,

Bing Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Luo, Jiahao and Wan, Bohui and Zhao, Yizhou and Li, Zongrui and Vasilkovsky, Michael and Wang, Chaoyang and Wang, Jian and Ahuja, Narendra and Zhou, Bing}, title = {RigMo: Unifying Rig and Motion Learning for Generative Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25438-25449} }
TokenLight: Precise Lighting Control in Images using Attribute Tokens: Sumit Chaturvedi,

Yannick Hold-Geoffroy,

Mengwei Ren,

Jingyuan Liu,

He Zhang,

Yiqun Mei,

Julie Dorsey,

Zhixin Shu; [pdf] [supp]
[bibtex]
@InProceedings{Chaturvedi_2026_CVPR, author = {Chaturvedi, Sumit and Hold-Geoffroy, Yannick and Ren, Mengwei and Liu, Jingyuan and Zhang, He and Mei, Yiqun and Dorsey, Julie and Shu, Zhixin}, title = {TokenLight: Precise Lighting Control in Images using Attribute Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19737-19748} }
Linking Modality Isolation in Heterogeneous Collaborative Perception: Changxing Liu,

Zichen Chao,

Siheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Changxing and Chao, Zichen and Chen, Siheng}, title = {Linking Modality Isolation in Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39765-39774} }
UltraFlux: Data-Model Co-Design for High-quality Native 4K Text-to-Image Generation across Diverse Aspect Ratios: Tian Ye,

Song Fei,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Tian and Fei, Song and Zhu, Lei}, title = {UltraFlux: Data-Model Co-Design for High-quality Native 4K Text-to-Image Generation across Diverse Aspect Ratios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22070-22079} }
InverFill: One-Step Inversion for Enhanced Few-Step Diffusion Inpainting: Duc Vu,

Kien Nguyen,

Trong-Tung Nguyen,

Ngan Nguyen,

Phong Nguyen,

Khoi Nguyen,

Cuong Pham,

Anh Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vu_2026_CVPR, author = {Vu, Duc and Nguyen, Kien and Nguyen, Trong-Tung and Nguyen, Ngan and Nguyen, Phong and Nguyen, Khoi and Pham, Cuong and Tran, Anh}, title = {InverFill: One-Step Inversion for Enhanced Few-Step Diffusion Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25677-25687} }
Expand and Prune: Maximizing Trajectory Diversity for Effective GRPO in Generative Models: Shiran Ge,

Chenyi Huang,

Yuang Ai,

Qihang Fan,

Huaibo Huang,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2026_CVPR, author = {Ge, Shiran and Huang, Chenyi and Ai, Yuang and Fan, Qihang and Huang, Huaibo and He, Ran}, title = {Expand and Prune: Maximizing Trajectory Diversity for Effective GRPO in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41913-41922} }
Disentangled Textual Priors for Diffusion-based Image Super-Resolution: Lei Jiang,

Xin Liu,

Xinze Tong,

Zhiliang Li,

Jie Liu,

Jie Tang,

Gangshan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Lei and Liu, Xin and Tong, Xinze and Li, Zhiliang and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {Disentangled Textual Priors for Diffusion-based Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38228-38237} }
NG-GS: NeRF-guided 3D Gaussian Splatting Segmentation: Yi He,

Tao Wang,

Yi Jin,

Congyan Lang,

Yidong Li,

Haibin Ling; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yi and Wang, Tao and Jin, Yi and Lang, Congyan and Li, Yidong and Ling, Haibin}, title = {NG-GS: NeRF-guided 3D Gaussian Splatting Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42061-42070} }
HandWorld: Hand-Centric Unified Video Action Generation: Zhihao Sun,

Zhiying Du,

Xitong Yang,

Zuxuan Wu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhihao and Du, Zhiying and Yang, Xitong and Wu, Zuxuan}, title = {HandWorld: Hand-Centric Unified Video Action Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15976-15985} }
Training-Only Heterogeneous Image-Patch-Text Graph Supervision for Advancing Few-Shot Learning Adapters: Mohammed Rahman Sherif Khan Mohammad,

Ardhendu Behera,

Sandip Pradhan,

Swagat Kumar,

Amr Ahmed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mohammad_2026_CVPR, author = {Mohammad, Mohammed Rahman Sherif Khan and Behera, Ardhendu and Pradhan, Sandip and Kumar, Swagat and Ahmed, Amr}, title = {Training-Only Heterogeneous Image-Patch-Text Graph Supervision for Advancing Few-Shot Learning Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19613-19622} }
Animator-Centric Skeleton Generation on Objects with Fine-Grained Details: Mingze Sun,

Cheng Zeng,

Jiansong Pei,

Junhao Chen,

Chaoyue Song,

Shaohui Wang,

Tianyuan Chang,

Bin Huang,

Zijiao Zeng,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Mingze and Zeng, Cheng and Pei, Jiansong and Chen, Junhao and Song, Chaoyue and Wang, Shaohui and Chang, Tianyuan and Huang, Bin and Zeng, Zijiao and Huang, Ruqi}, title = {Animator-Centric Skeleton Generation on Objects with Fine-Grained Details}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17336-17345} }
ROSE: Rotate Your Large Language Model to See: Tongtian Yue,

Xuange Gao,

Longteng Guo,

Zijia Zhao,

Zikang Liu,

Jie Jiang,

Hua Huang,

Jing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yue_2026_CVPR, author = {Yue, Tongtian and Gao, Xuange and Guo, Longteng and Zhao, Zijia and Liu, Zikang and Jiang, Jie and Huang, Hua and Liu, Jing}, title = {ROSE: Rotate Your Large Language Model to See}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19265-19275} }
Interactive Tracking: A Human-in-the-Loop Paradigm with Memory-Augmented Adaptation: Yuqing Huang,

Guotian Zeng,

Zhenqiao Yuan,

Zhenyu He,

Xin Li,

Yaowei Wang,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yuqing and Zeng, Guotian and Yuan, Zhenqiao and He, Zhenyu and Li, Xin and Wang, Yaowei and Yang, Ming-Hsuan}, title = {Interactive Tracking: A Human-in-the-Loop Paradigm with Memory-Augmented Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35166-35176} }
TokenHand: Discrete Token Representation for Efficient Hand Mesh Reconstruction: Xinguo He,

Yixin Shen,

Rahul Chaudhari; [pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xinguo and Shen, Yixin and Chaudhari, Rahul}, title = {TokenHand: Discrete Token Representation for Efficient Hand Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8921-8931} }
VAR RL Done Right: Tackling Asynchronous Policy Conflicts in Visual Autoregressive Generation: Shikun Sun,

Liao Qu,

Huichao Zhang,

Yiheng Liu,

Yangyang Song,

Xian Li,

Yi Jiang,

Xu Wang,

Jia Jia,

Daniel K. Du,

Xinglong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Shikun and Qu, Liao and Zhang, Huichao and Liu, Yiheng and Song, Yangyang and Li, Xian and Jiang, Yi and Wang, Xu and Jia, Jia and Du, Daniel K. and Wu, Xinglong}, title = {VAR RL Done Right: Tackling Asynchronous Policy Conflicts in Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1874-1884} }
Beyond Myopic Alignment: Lookahead Optimization for Online Class-Incremental Learning: Song Lai,

Zhe Zhao,

Fei Zhu,

Ji Cheng,

Xi Lin,

Qingfu Zhang,

Gaofeng Meng; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Song and Zhao, Zhe and Zhu, Fei and Cheng, Ji and Lin, Xi and Zhang, Qingfu and Meng, Gaofeng}, title = {Beyond Myopic Alignment: Lookahead Optimization for Online Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18053-18062} }
Degradation-Robust Fusion: An Efficient Degradation-Aware Diffusion Framework for Multimodal Image Fusion in Arbitrary Degradation Scenarios: Yu Shi,

Yu Liu,

Zhong-Cheng Wu,

Juan Cheng,

Huafeng Li,

Xun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yu and Liu, Yu and Wu, Zhong-Cheng and Cheng, Juan and Li, Huafeng and Chen, Xun}, title = {Degradation-Robust Fusion: An Efficient Degradation-Aware Diffusion Framework for Multimodal Image Fusion in Arbitrary Degradation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33848-33858} }
Endless World: Real-Time 3D-Aware Long Video Generation: Ke Zhang,

Jiacong Xu,

Yiqun Mei,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ke and Xu, Jiacong and Mei, Yiqun and Patel, Vishal M.}, title = {Endless World: Real-Time 3D-Aware Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18386-18395} }
Advancing Cancer Prognosis with Hierarchical Fusion of Genomic, Proteomic and Pathology Imaging Data from a Systems Biology Perspective: Junjie Zhou,

Bao Xue,

Meiling Wang,

Wei Shao,

Daoqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Junjie and Xue, Bao and Wang, Meiling and Shao, Wei and Zhang, Daoqiang}, title = {Advancing Cancer Prognosis with Hierarchical Fusion of Genomic, Proteomic and Pathology Imaging Data from a Systems Biology Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12554-12564} }
RAISE: Requirement-Adaptive Evolutionary Refinement for Training-Free Text-to-Image Alignment: Liyao Jiang,

Ruichen Chen,

Chao Gao,

Di Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Liyao and Chen, Ruichen and Gao, Chao and Niu, Di}, title = {RAISE: Requirement-Adaptive Evolutionary Refinement for Training-Free Text-to-Image Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22038-22048} }
Hoi! - A Multimodal Dataset for Force-Grounded, Cross-View Articulated Manipulation: Tim Engelbracht,

René Zurbrügg,

Matteo Wohlrapp,

Martin Büchner,

Abhinav Valada,

Marc Pollefeys,

Hermann Blum,

Zuria Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Engelbracht_2026_CVPR, author = {Engelbracht, Tim and Zurbr\"ugg, Ren\'e and Wohlrapp, Matteo and B\"uchner, Martin and Valada, Abhinav and Pollefeys, Marc and Blum, Hermann and Bauer, Zuria}, title = {Hoi! - A Multimodal Dataset for Force-Grounded, Cross-View Articulated Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8880-8890} }
4DSurf: High-Fidelity Dynamic Scene Surface Reconstruction: Renjie Wu,

Hongdong Li,

Jose M. Alvarez,

Miaomiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Renjie and Li, Hongdong and Alvarez, Jose M. and Liu, Miaomiao}, title = {4DSurf: High-Fidelity Dynamic Scene Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22539-22549} }
MultiBanana: A Challenging Benchmark for Multi-Reference Text-to-Image Generation: Yuta Oshima,

Daiki Miyake,

Kohsei Matsutani,

Yusuke Iwasawa,

Masahiro Suzuki,

Yutaka Matsuo,

Hiroki Furuta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oshima_2026_CVPR, author = {Oshima, Yuta and Miyake, Daiki and Matsutani, Kohsei and Iwasawa, Yusuke and Suzuki, Masahiro and Matsuo, Yutaka and Furuta, Hiroki}, title = {MultiBanana: A Challenging Benchmark for Multi-Reference Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {448-460} }
Distilling Unsigned Distance Function for Surface Reconstruction from 3D Gaussian Splatting: Qian Li,

Rao Fu,

Jiangtao Li,

Fan Liu; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qian and Fu, Rao and Li, Jiangtao and Liu, Fan}, title = {Distilling Unsigned Distance Function for Surface Reconstruction from 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4891-4901} }
TUDSR: Twice Upsampling-Diffusion for Higher Super-Resolution: Zhiqiang Wu,

Yitong Dong,

Xian Wei; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhiqiang and Dong, Yitong and Wei, Xian}, title = {TUDSR: Twice Upsampling-Diffusion for Higher Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38208-38217} }
TriLite: Efficient Weakly Supervised Object Localization with Universal Visual Features and Tri-Region Disentanglement: Arian Sabaghi,

Jose Oramas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sabaghi_2026_CVPR, author = {Sabaghi, Arian and Oramas, Jose}, title = {TriLite: Efficient Weakly Supervised Object Localization with Universal Visual Features and Tri-Region Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41386-41395} }
VLM-3R: Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction: Zhiwen Fan,

Jian Zhang,

Renjie Li,

Junge Zhang,

Runjin Chen,

Hezhen Hu,

Kevin Wang,

Peihao Wang,

Huaizhi Qu,

Shijie Zhou,

Dilin Wang,

Zhicheng Yan,

Hongyu Xu,

Justin Theiss,

Tianlong Chen,

Jiachen Li,

Zhengzhong Tu,

Zhangyang Wang,

Rakesh Ranjan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Zhiwen and Zhang, Jian and Li, Renjie and Zhang, Junge and Chen, Runjin and Hu, Hezhen and Wang, Kevin and Wang, Peihao and Qu, Huaizhi and Zhou, Shijie and Wang, Dilin and Yan, Zhicheng and Xu, Hongyu and Theiss, Justin and Chen, Tianlong and Li, Jiachen and Tu, Zhengzhong and Wang, Zhangyang and Ranjan, Rakesh}, title = {VLM-3R: Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31054-31065} }
Pixel Motion Diffusion is What We Need for Robot Control: E-Ro Nguyen,

Yichi Zhang,

Kanchana Ranasinghe,

Xiang Li,

Michael S. Ryoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, E-Ro and Zhang, Yichi and Ranasinghe, Kanchana and Li, Xiang and Ryoo, Michael S.}, title = {Pixel Motion Diffusion is What We Need for Robot Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23663-23672} }
TouchDream: 3D Object Completion through Imagined Touch: Yuanbo Wang,

Xinning Wang,

Zhaoxuan Zhang,

Changlong Wang,

Qianchen Xia,

Xiaopeng Wei,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanbo and Wang, Xinning and Zhang, Zhaoxuan and Wang, Changlong and Xia, Qianchen and Wei, Xiaopeng and Yang, Xin}, title = {TouchDream: 3D Object Completion through Imagined Touch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8901-8910} }
Sparsely Timing the Change: A Spiking Temporal Framework for Remote Sensing Interpretation: Shilong Li,

Xiurui Xie,

Qiugang Zhan,

Luochao Wang,

Yong Deng,

Guisong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shilong and Xie, Xiurui and Zhan, Qiugang and Wang, Luochao and Deng, Yong and Liu, Guisong}, title = {Sparsely Timing the Change: A Spiking Temporal Framework for Remote Sensing Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34575-34585} }
FreeForm: Reduced-Order Deformable Simulation from Particle-Based Skinning Eigenmodes: Donglai Xiang,

Vismay Modi,

Rishit Dagli,

Ty Trusty,

Gilles Daviet,

Anka He Chen,

Nicholas Sharp,

David I.W. Levin; [pdf] [supp]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Donglai and Modi, Vismay and Dagli, Rishit and Trusty, Ty and Daviet, Gilles and Chen, Anka He and Sharp, Nicholas and Levin, David I.W.}, title = {FreeForm: Reduced-Order Deformable Simulation from Particle-Based Skinning Eigenmodes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32475-32484} }
RAW-Domain Degradation Models for Realistic Smartphone Super-Resolution: Ali Mosleh,

Faraz Ali,

Fengjia Zhang,

Stavros Tsogkas,

Junyong Lee,

Michael S. Brown,

Alex Levinshtein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mosleh_2026_CVPR, author = {Mosleh, Ali and Ali, Faraz and Zhang, Fengjia and Tsogkas, Stavros and Lee, Junyong and Brown, Michael S. and Levinshtein, Alex}, title = {RAW-Domain Degradation Models for Realistic Smartphone Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23430-23439} }
Real-Time Long Horizon Air Quality Forecasting via Group-Relative Policy Optimization: Inha Kang,

Eunki Kim,

Wonjeong Ryu,

Jaeyo Shin,

Seungjun Yu,

Yoon-Hee Kang,

Seongeun Jeong,

Eunhye Kim,

Soontae Kim,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Inha and Kim, Eunki and Ryu, Wonjeong and Shin, Jaeyo and Yu, Seungjun and Kang, Yoon-Hee and Jeong, Seongeun and Kim, Eunhye and Kim, Soontae and Shim, Hyunjung}, title = {Real-Time Long Horizon Air Quality Forecasting via Group-Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6421-6431} }
LEAD: Minimizing Learner-Expert Asymmetry in End-to-End Driving: Long Nguyen,

Micha Fauth,

Bernhard Jaeger,

Daniel Dauner,

Maximilian Igl,

Andreas Geiger,

Kashyap Chitta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Long and Fauth, Micha and Jaeger, Bernhard and Dauner, Daniel and Igl, Maximilian and Geiger, Andreas and Chitta, Kashyap}, title = {LEAD: Minimizing Learner-Expert Asymmetry in End-to-End Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39775-39785} }
PromptLoop: Plug-and-Play Prompt Refinement via Latent Feedback for Diffusion Model Alignment: Suhyeon Lee,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Suhyeon and Ye, Jong Chul}, title = {PromptLoop: Plug-and-Play Prompt Refinement via Latent Feedback for Diffusion Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41859-41869} }
DIMOS: Disentangling Instance-level Moving Object Segmentation: Hongxiang Huang,

Hongwei Ren,

Xiaopeng Lin,

Yulong Huang,

Zeke Xie,

Bojun Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Hongxiang and Ren, Hongwei and Lin, Xiaopeng and Huang, Yulong and Xie, Zeke and Cheng, Bojun}, title = {DIMOS: Disentangling Instance-level Moving Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39806-39816} }
PrITTI: Primitive-based Generation of Controllable and Editable 3D Semantic Urban Scenes: Christina Ourania Tze,

Daniel Dauner,

Yiyi Liao,

Dzmitry Tsishkou,

Andreas Geiger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tze_2026_CVPR, author = {Tze, Christina Ourania and Dauner, Daniel and Liao, Yiyi and Tsishkou, Dzmitry and Geiger, Andreas}, title = {PrITTI: Primitive-based Generation of Controllable and Editable 3D Semantic Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32613-32624} }
DSCA: Dynamic Subspace Concept Alignment for Lifelong VLM Editing: Gyanendra Das,

Sai Jena; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Das_2026_CVPR, author = {Das, Gyanendra and Jena, Sai}, title = {DSCA: Dynamic Subspace Concept Alignment for Lifelong VLM Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40772-40781} }
BeautyGRPO: Aesthetic Alignment for Face Retouching via Dynamic Path Guidance and Fine-Grained Preference Modeling: Jiachen Yang,

Xianhui Lin,

Yi Dong,

Zebiao Zheng,

Xing Liu,

Hong Gu,

Yanmei Fang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiachen and Lin, Xianhui and Dong, Yi and Zheng, Zebiao and Liu, Xing and Gu, Hong and Fang, Yanmei}, title = {BeautyGRPO: Aesthetic Alignment for Face Retouching via Dynamic Path Guidance and Fine-Grained Preference Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25110-25120} }
DreamSAC: Learning Hamiltonian World Models via Symmetry Exploration: Jinzhou Tang,

Fan Feng,

Minghao Fu,

Wenjun Lin,

Jing Yang,

Biwei Huang,

Keze Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jinzhou and Feng, Fan and Fu, Minghao and Lin, Wenjun and Yang, Jing and Huang, Biwei and Wang, Keze}, title = {DreamSAC: Learning Hamiltonian World Models via Symmetry Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15188-15198} }
BiGain: Unified Token Compression for Joint Generation and Classification: Jiacheng Liu,

Shengkun Tang,

Jiacheng Cui,

Dongkuan Xu,

Zhiqiang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jiacheng and Tang, Shengkun and Cui, Jiacheng and Xu, Dongkuan and Shen, Zhiqiang}, title = {BiGain: Unified Token Compression for Joint Generation and Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31931-31940} }
Say Cheese! Detail-Preserving Portrait Collection Generation via Natural Language Edits: Zelong Sun,

Jiahui Wu,

Ying Ba,

Dong Jing,

Zhiwu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zelong and Wu, Jiahui and Ba, Ying and Jing, Dong and Lu, Zhiwu}, title = {Say Cheese! Detail-Preserving Portrait Collection Generation via Natural Language Edits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7827-7836} }
Resolving Evidence Sparsity: Agentic Context Engineering for Long-Document Understanding: Keliang Liu,

Zizhi Chen,

Mingcheng Li,

Jingqun Tang,

Dingkang Yang,

Lihua Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Keliang and Chen, Zizhi and Li, Mingcheng and Tang, Jingqun and Yang, Dingkang and Zhang, Lihua}, title = {Resolving Evidence Sparsity: Agentic Context Engineering for Long-Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19452-19462} }
4DP-QA: Scalable QA for 4D Perception in Vision Language Models: Seokju Cho,

Abhishek Badki,

Hang Su,

Jindong Jiang,

Ziyao Zeng,

Seungryong Kim,

Sifei Liu,

Orazio Gallo; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Seokju and Badki, Abhishek and Su, Hang and Jiang, Jindong and Zeng, Ziyao and Kim, Seungryong and Liu, Sifei and Gallo, Orazio}, title = {4DP-QA: Scalable QA for 4D Perception in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23869-23879} }
Neighbor-Aware Localized Concept Erasure in Text-to-Image Diffusion Models: Zhuan Shi,

Alireza Dehghanpour Farashah,

Rik de Vries,

Golnoosh Farnadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Zhuan and Farashah, Alireza Dehghanpour and de Vries, Rik and Farnadi, Golnoosh}, title = {Neighbor-Aware Localized Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17440-17450} }
HAWK: Head Importance-Aware Visual Token Pruning in Multimodal Models: Qihui Zhu,

Tao Zhang,

Yuchen Wang,

Shuangwu Chen,

Xiaobin Tan,

Jian Yang,

Yang Liu,

Yinfei Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Qihui and Zhang, Tao and Wang, Yuchen and Chen, Shuangwu and Tan, Xiaobin and Yang, Jian and Liu, Yang and Pan, Yinfei}, title = {HAWK: Head Importance-Aware Visual Token Pruning in Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39583-39592} }
DarkShake-DVS: Event-based Human Action Recognition under Low-light and Shaking Camera Conditions: Jiaqi Chen,

Qinfu Xu,

Liyuan Pan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jiaqi and Xu, Qinfu and Pan, Liyuan}, title = {DarkShake-DVS: Event-based Human Action Recognition under Low-light and Shaking Camera Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20149-20159} }
EthoCLIP: Ontology-Enhanced Video-Language Pretraining for Animal Behavior Understanding: Yinuo Jing,

Jinyan Wu,

Zixi Yang,

Kongming Liang,

Xiatian Zhu,

Zhanyu Ma; [pdf] [supp]
[bibtex]
@InProceedings{Jing_2026_CVPR, author = {Jing, Yinuo and Wu, Jinyan and Yang, Zixi and Liang, Kongming and Zhu, Xiatian and Ma, Zhanyu}, title = {EthoCLIP: Ontology-Enhanced Video-Language Pretraining for Animal Behavior Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31196-31206} }
TriSim: Tri-Dimensional Similarity Modeling with Extreme Value Theory for False-Negative Mitigation in Remote Sensing Image-Text Retrieval: Chengyu Zheng,

Hanzhang Lu,

Jie Nie,

Shan Du; [pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Chengyu and Lu, Hanzhang and Nie, Jie and Du, Shan}, title = {TriSim: Tri-Dimensional Similarity Modeling with Extreme Value Theory for False-Negative Mitigation in Remote Sensing Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23944-23954} }
MapReduce LoRA: Advancing the Pareto Front in Multi-Preference Optimization for Generative Models: Chieh-Yun Chen,

Zhonghao Wang,

Qi Chen,

Zhifan Ye,

Min Shi,

Yue Zhao,

Yinan Zhao,

Hui Qu,

Wei-An Lin,

Yiru Shen,

Ajinkya Kale,

Irfan Essa,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Chieh-Yun and Wang, Zhonghao and Chen, Qi and Ye, Zhifan and Shi, Min and Zhao, Yue and Zhao, Yinan and Qu, Hui and Lin, Wei-An and Shen, Yiru and Kale, Ajinkya and Essa, Irfan and Shi, Humphrey}, title = {MapReduce LoRA: Advancing the Pareto Front in Multi-Preference Optimization for Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34375-34384} }
FedSST: Rethinking Fair Federated Graph Learning under Structural Shift: Dingyi Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dingyi}, title = {FedSST: Rethinking Fair Federated Graph Learning under Structural Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10335-10345} }
Beyond Perceptual Shortcuts: Causal-Inspired Debiasing Optimization for Generalizable Video Reasoning in Lightweight MLLMs: Jingze Wu,

Quan Zhang,

Hongfei Suo,

Zeqiang Cai,

Hongbo Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jingze and Zhang, Quan and Suo, Hongfei and Cai, Zeqiang and Chen, Hongbo}, title = {Beyond Perceptual Shortcuts: Causal-Inspired Debiasing Optimization for Generalizable Video Reasoning in Lightweight MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12259-12268} }
Self-Consistency for LLM-Based Motion Trajectory Generation and Verification: Jiaju Ma,

R. Kenny Jones,

Jiajun Wu,

Maneesh Agrawala; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Jiaju and Jones, R. Kenny and Wu, Jiajun and Agrawala, Maneesh}, title = {Self-Consistency for LLM-Based Motion Trajectory Generation and Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17357-17366} }
Agentic Retoucher for Text-To-Image Generation: Shaocheng Shen,

Jianfeng Liang,

Chunlei Cai,

Cong Geng,

Huiyu Duan,

Xiaoyun Zhang,

Qiang Hu,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Shaocheng and Liang, Jianfeng and Cai, Chunlei and Geng, Cong and Duan, Huiyu and Zhang, Xiaoyun and Hu, Qiang and Zhai, Guangtao}, title = {Agentic Retoucher for Text-To-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29114-29125} }
Scalable Trajectory Generation for Whole-Body Mobile Manipulation: Yida Niu,

Xinhai Chang,

Xin Liu,

Ziyuan Jiao,

Yixin Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2026_CVPR, author = {Niu, Yida and Chang, Xinhai and Liu, Xin and Jiao, Ziyuan and Zhu, Yixin}, title = {Scalable Trajectory Generation for Whole-Body Mobile Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1798-1808} }
UTPTrack: Towards Simple and Unified Token Pruning for Visual Tracking: Hao Wu,

Xudong Wang,

Jialiang Zhang,

Junlong Tong,

Xinghao Chen,

Junyan Lin,

Yunpu Ma,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Hao and Wang, Xudong and Zhang, Jialiang and Tong, Junlong and Chen, Xinghao and Lin, Junyan and Ma, Yunpu and Shen, Xiaoyu}, title = {UTPTrack: Towards Simple and Unified Token Pruning for Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20963-20972} }
UniDef: Universal Defense Against Unauthorized Image Manipulation: Mingwen Shao,

Lingzhuang Meng,

Xiang Lv,

Mengyao Wu,

Xinyuan Chen,

Qiao Zhang,

Chang Liu,

Yuanjian Qiao,

Chao Dong; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Mingwen and Meng, Lingzhuang and Lv, Xiang and Wu, Mengyao and Chen, Xinyuan and Zhang, Qiao and Liu, Chang and Qiao, Yuanjian and Dong, Chao}, title = {UniDef: Universal Defense Against Unauthorized Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8631-8640} }
CLCR: Cross-Level Semantic Collaborative Representation for Multimodal Learning: Chunlei Meng,

Guanhong Huang,

Rong Fu,

Runmin Jian,

Zhongxue Gan,

Chun Ouyang; [pdf] [arXiv]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Chunlei and Huang, Guanhong and Fu, Rong and Jian, Runmin and Gan, Zhongxue and Ouyang, Chun}, title = {CLCR: Cross-Level Semantic Collaborative Representation for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1606-1615} }
Optical Diffraction-based Convolution for Semiconductor Lithography: Young-Han Son,

Dong-Hee Shin,

Deok-Joong Lee,

Hyun Jung Lee,

Tae-Eui Kam; [pdf] [supp]
[bibtex]
@InProceedings{Son_2026_CVPR, author = {Son, Young-Han and Shin, Dong-Hee and Lee, Deok-Joong and Lee, Hyun Jung and Kam, Tae-Eui}, title = {Optical Diffraction-based Convolution for Semiconductor Lithography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12458-12468} }
DASH: A Meta-Attack Framework for Synthesizing Effective and Stealthy Adversarial Examples: Abdullah Al Nomaan Nafi,

Habibur Rahaman,

Zafaryab Haider,

Tanzim Mahfuz,

Fnu Suya,

Swarup Bhunia,

Prabuddha Chakraborty; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Al_Nomaan_Nafi_2026_CVPR, author = {Al Nomaan Nafi, Abdullah and Rahaman, Habibur and Haider, Zafaryab and Mahfuz, Tanzim and Suya, Fnu and Bhunia, Swarup and Chakraborty, Prabuddha}, title = {DASH: A Meta-Attack Framework for Synthesizing Effective and Stealthy Adversarial Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27884-27893} }
Sensor2Sensor: Cross-Embodiment Sensor Conversion for Autonomous Driving: Jiahao Wang,

Bo Sun,

Yijing Bai,

Vincent Casser,

Songyou Peng,

Zehao Zhu,

Meng-Li Shih,

Xander Masotto,

Shih-Yang Su,

Kanaad Parvate,

Tiancheng Ge,

Linn Bieske,

Dragomir Anguelov,

Mingxing Tan,

Chiyu Max Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Sun, Bo and Bai, Yijing and Casser, Vincent and Peng, Songyou and Zhu, Zehao and Shih, Meng-Li and Masotto, Xander and Su, Shih-Yang and Parvate, Kanaad and Ge, Tiancheng and Bieske, Linn and Anguelov, Dragomir and Tan, Mingxing and Jiang, Chiyu Max}, title = {Sensor2Sensor: Cross-Embodiment Sensor Conversion for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32093-32102} }
GSNR: Graph Smooth Null-Space Representation for Inverse Problems: Romario Gualdrón-Hurtado,

Roman Jacome,

Rafael S. Suárez,

Henry Arguello; [pdf] [supp]
[bibtex]
@InProceedings{Gualdron-Hurtado_2026_CVPR, author = {Gualdr\'on-Hurtado, Romario and Jacome, Roman and Su\'arez, Rafael S. and Arguello, Henry}, title = {GSNR: Graph Smooth Null-Space Representation for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12469-12479} }
RL-ScanIQA: Reinforcement-Learned Scanpaths for Blind 360deg Image Quality Assessment: Yujia Wang,

Yuyan Li,

Jiuming Liu,

Fang-Lue Zhang,

Xinhu Zheng,

Neil.A Dodgson; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yujia and Li, Yuyan and Liu, Jiuming and Zhang, Fang-Lue and Zheng, Xinhu and Dodgson, Neil.A}, title = {RL-ScanIQA: Reinforcement-Learned Scanpaths for Blind 360deg Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37401-37412} }
U-Mind: A Unified Framework for Real-Time Multimodal Interaction with Audiovisual Generation: Xiang Deng,

Feng Gao,

Yong Zhang,

Youxin Pang,

Xu Xiaoming,

Zhuoliang Kang,

Xiaoming Wei,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Xiang and Gao, Feng and Zhang, Yong and Pang, Youxin and Xiaoming, Xu and Kang, Zhuoliang and Wei, Xiaoming and Liu, Yebin}, title = {U-Mind: A Unified Framework for Real-Time Multimodal Interaction with Audiovisual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10874-10886} }
ReasonEdit: Towards Reasoning-Enhanced Image Editing Models: Fukun Yin,

Shiyu Liu,

Yucheng Han,

Zhibo Wang,

Peng Xing,

Rui Wang,

Wei Cheng,

Yingming Wang,

Aojie Li,

Zixin Yin,

Pengtao Chen,

Xianfang Zeng,

Gang Yu,

Daxin Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Fukun and Liu, Shiyu and Han, Yucheng and Wang, Zhibo and Xing, Peng and Wang, Rui and Cheng, Wei and Wang, Yingming and Li, Aojie and Yin, Zixin and Chen, Pengtao and Zeng, Xianfang and Yu, Gang and Jiang, Daxin}, title = {ReasonEdit: Towards Reasoning-Enhanced Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23283-23293} }
PromptEnhancer: Taming Your Rewriter for Text-to-Image Generation via Fine-Grained Reward: Linqing Wang,

Zhiyong Xu,

Ximing Xing,

Yiji Cheng,

Zhiyuan Zhao,

Donghao Li,

Tiankai Hang,

Zhenxi Li,

Jiale Tao,

Qixun Wang,

Ruihuang Li,

Comi Chen,

Xin Li,

Mingrui Wu,

Xinchi Deng,

Shuyang Gu,

Chunyu Wang,

Qinglin Lu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Linqing and Xu, Zhiyong and Xing, Ximing and Cheng, Yiji and Zhao, Zhiyuan and Li, Donghao and Hang, Tiankai and Li, Zhenxi and Tao, Jiale and Wang, Qixun and Li, Ruihuang and Chen, Comi and Li, Xin and Wu, Mingrui and Deng, Xinchi and Gu, Shuyang and Wang, Chunyu and Lu, Qinglin}, title = {PromptEnhancer: Taming Your Rewriter for Text-to-Image Generation via Fine-Grained Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14895-14904} }
Beyond Patches: Global-aware Autoregressive Model for Multimodal Few-Shot Font Generation: Haonan Cai,

Yuxuan Luo,

Zhouhui Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Haonan and Luo, Yuxuan and Lian, Zhouhui}, title = {Beyond Patches: Global-aware Autoregressive Model for Multimodal Few-Shot Font Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {517-528} }
FAST: Topology-Aware Frequency-Domain Distribution Matching for Coreset Selection: Jin Cui,

Boran Zhao,

Jiajun Xu,

Jiaqi Guo,

Shuo Guan,

Pengju Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Jin and Zhao, Boran and Xu, Jiajun and Guo, Jiaqi and Guan, Shuo and Ren, Pengju}, title = {FAST: Topology-Aware Frequency-Domain Distribution Matching for Coreset Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24748-24758} }
Clay-to-Stone: Phase-wise 3D Gaussian Splatting for Monocular Articulated Hand-Object Manipulation Modeling: Xingyu Liu,

Pengfei Ren,

Qi Qi,

Haifeng Sun,

Zirui Zhuang,

Jianxin Liao,

Jingyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xingyu and Ren, Pengfei and Qi, Qi and Sun, Haifeng and Zhuang, Zirui and Liao, Jianxin and Wang, Jingyu}, title = {Clay-to-Stone: Phase-wise 3D Gaussian Splatting for Monocular Articulated Hand-Object Manipulation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23128-23138} }
eRetinexGS: Retinex Modeling for Low-Light Scene Enhancement via Event Streams and 3D Gaussian Splatting: Haojie Yan,

Zehao Chen,

Yan Liu,

Shi Gu,

Peng Lin,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Haojie and Chen, Zehao and Liu, Yan and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {eRetinexGS: Retinex Modeling for Low-Light Scene Enhancement via Event Streams and 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8056-8066} }
WebGym: Scaling Training Environments for Long-Horizon Visual Web Agents with Realistic Tasks: Hao Bai,

Alexey Taymanov,

Tong Zhang,

Aviral Kumar,

Spencer Whitehead; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Hao and Taymanov, Alexey and Zhang, Tong and Kumar, Aviral and Whitehead, Spencer}, title = {WebGym: Scaling Training Environments for Long-Horizon Visual Web Agents with Realistic Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12248-12258} }
Test-Time Multi-Prompt Adaptation for Open-Vocabulary Remote Sensing Image Segmentation: Ting Yang,

Qilong Wang,

Qibin Hou,

Qinghua Hu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ting and Wang, Qilong and Hou, Qibin and Hu, Qinghua}, title = {Test-Time Multi-Prompt Adaptation for Open-Vocabulary Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10699-10709} }
The Midas Touch for Metric Depth: Yu Ma,

Zizhan Guo,

Zuyi Xiong,

Haoran Zhang,

Yi Feng,

Hongbo Zhao,

Hanli Wang,

Rui Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Yu and Guo, Zizhan and Xiong, Zuyi and Zhang, Haoran and Feng, Yi and Zhao, Hongbo and Wang, Hanli and Fan, Rui}, title = {The Midas Touch for Metric Depth}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5804-5813} }
Wavelet-Driven 3D Anomaly Detection under Pose-Agnostic and Sparse-View: Mingwen Shao,

Qiao Zhang,

Xinyuan Chen,

Xiang Lv,

Lingzhuang Meng,

Chang Liu,

Qinglin Zhan,

Ling Jian; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Mingwen and Zhang, Qiao and Chen, Xinyuan and Lv, Xiang and Meng, Lingzhuang and Liu, Chang and Zhan, Qinglin and Jian, Ling}, title = {Wavelet-Driven 3D Anomaly Detection under Pose-Agnostic and Sparse-View}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43083-43092} }
OmniLottie: Generating Vector Animations via Parameterized Lottie Tokens: Yiying Yang,

Wei Cheng,

Sijin Chen,

Honghao Fu,

Xianfang Zeng,

Yujun Cai,

Gang Yu,

Xingjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yiying and Cheng, Wei and Chen, Sijin and Fu, Honghao and Zeng, Xianfang and Cai, Yujun and Yu, Gang and Ma, Xingjun}, title = {OmniLottie: Generating Vector Animations via Parameterized Lottie Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39293-39303} }
GeneVAR: Causal MeanFlow for Autoregressive Gene-to-WSI Tile Synthesis: Jianwei Zhao,

Fan Yang,

Xin Li,

Qiang Zhai,

Ao Luo,

Ziqi Ren,

Zhicheng Jiao,

Hong Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianwei and Yang, Fan and Li, Xin and Zhai, Qiang and Luo, Ao and Ren, Ziqi and Jiao, Zhicheng and Cheng, Hong}, title = {GeneVAR: Causal MeanFlow for Autoregressive Gene-to-WSI Tile Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34116-34125} }
Narrative Weaver: Towards Controllable Long-Range Visual Consistency with Multi-Modal Conditioning: Zhengjian Yao,

Yongzhi Li,

Xinyuan Gao,

Quan Chen,

Peng Jiang,

Yanye Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Zhengjian and Li, Yongzhi and Gao, Xinyuan and Chen, Quan and Jiang, Peng and Lu, Yanye}, title = {Narrative Weaver: Towards Controllable Long-Range Visual Consistency with Multi-Modal Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7707-7718} }
Bilevel Layer-Positioning LoRA for Real Image Dehazing: Yan Zhang,

Long Ma,

Yuxin Feng,

Zhe Huang,

Fan Zhou,

Zhuo Su; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yan and Ma, Long and Feng, Yuxin and Huang, Zhe and Zhou, Fan and Su, Zhuo}, title = {Bilevel Layer-Positioning LoRA for Real Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29969-29978} }
TrajTok: Learning Trajectory Tokens Enhances Video Understanding: Chenhao Zheng,

Jieyu Zhang,

Jianing Zhang,

Weikai Huang,

Ashutosh Kumar,

Quan Kong,

Oncel Tuzel,

Chun-Liang Li,

Ranjay Krishna; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Chenhao and Zhang, Jieyu and Zhang, Jianing and Huang, Weikai and Kumar, Ashutosh and Kong, Quan and Tuzel, Oncel and Li, Chun-Liang and Krishna, Ranjay}, title = {TrajTok: Learning Trajectory Tokens Enhances Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31207-31218} }
3D-VCD: Hallucination Mitigation in 3D-LLM Embodied Agents through Visual Contrastive Decoding: Makanjuola Adekunmi Ogunleye,

Eman Abdelrahman,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ogunleye_2026_CVPR, author = {Ogunleye, Makanjuola Adekunmi and Abdelrahman, Eman and Lourentzou, Ismini}, title = {3D-VCD: Hallucination Mitigation in 3D-LLM Embodied Agents through Visual Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40197-40207} }
SAT-RRG: LLM-Guided Self-Adaptive Training for Radiology Report Generation with Token-Level Push-Pull Optimization: Yunyi Liu,

Yingshu Li,

Tong Chen,

Lingqiao Liu,

Lei Wang,

Luping Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yunyi and Li, Yingshu and Chen, Tong and Liu, Lingqiao and Wang, Lei and Zhou, Luping}, title = {SAT-RRG: LLM-Guided Self-Adaptive Training for Radiology Report Generation with Token-Level Push-Pull Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35363-35372} }
MoRE: 3D Visual Geometry Reconstruction Meets Mixture-of-Experts: Jingnan Gao,

Zhe Wang,

Xianze Fang,

Xingyu Ren,

Zhuo Chen,

Shengqi Liu,

Yuhao Cheng,

Jiangjing Lyu,

Xiaokang Yang,

Yichao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jingnan and Wang, Zhe and Fang, Xianze and Ren, Xingyu and Chen, Zhuo and Liu, Shengqi and Cheng, Yuhao and Lyu, Jiangjing and Yang, Xiaokang and Yan, Yichao}, title = {MoRE: 3D Visual Geometry Reconstruction Meets Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14680-14691} }
UniVerse: A Unified Modulation Framework for Segmentation-Free, Disentangled Multi-Concept Personalization: Quynh Phung,

Sandesh Ghimire,

Minsi Hu,

Chung-Chi Tsai,

Jia-Bin Huang; [pdf] [supp]
[bibtex]
@InProceedings{Phung_2026_CVPR, author = {Phung, Quynh and Ghimire, Sandesh and Hu, Minsi and Tsai, Chung-Chi and Huang, Jia-Bin}, title = {UniVerse: A Unified Modulation Framework for Segmentation-Free, Disentangled Multi-Concept Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22007-22016} }
GeoMotion: Rethinking Motion Segmentation via Latent 4D Geometry: Xiankang He,

Peile Lin,

Ying Cui,

Dongyan Guo,

Chunhua Shen,

Xiaoqin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xiankang and Lin, Peile and Cui, Ying and Guo, Dongyan and Shen, Chunhua and Zhang, Xiaoqin}, title = {GeoMotion: Rethinking Motion Segmentation via Latent 4D Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28145-28155} }
Generative Point Tracking and Forecasting: Xuanchen Lu,

Ang Cao,

Chao Feng,

Andrew Owens; [pdf]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Xuanchen and Cao, Ang and Feng, Chao and Owens, Andrew}, title = {Generative Point Tracking and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28167-28178} }
Dual-branch Distilled Transformer for Efficient Asymmetric UAV Tracking: Hongtao Yang,

Bineng Zhong,

Qihua Liang,

Yaozong Zheng,

Xiantao Hu,

Yuanliang Xue,

Shuxiang Song; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Hongtao and Zhong, Bineng and Liang, Qihua and Zheng, Yaozong and Hu, Xiantao and Xue, Yuanliang and Song, Shuxiang}, title = {Dual-branch Distilled Transformer for Efficient Asymmetric UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13615-13625} }
DiffSoup: Direct Differentiable Rasterization of Triangle Soup for Extreme Radiance Field Simplification: Kenji Tojo,

Bernd Bickel,

Nobuyuki Umetani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tojo_2026_CVPR, author = {Tojo, Kenji and Bickel, Bernd and Umetani, Nobuyuki}, title = {DiffSoup: Direct Differentiable Rasterization of Triangle Soup for Extreme Radiance Field Simplification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8353-8363} }
dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models: Yi Xin,

Siqi Luo,

Tianxiang Xu,

Qi Qin,

Haoxing Chen,

Kaiwen Zhu,

Zhiwei Zhang,

Yangfan He,

Rongchao Zhang,

Jinbin Bai,

Shuo Cao,

Bin Fu,

Junjun He,

Yihao Liu,

Yuewen Cao,

Xiaohong Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xin_2026_CVPR, author = {Xin, Yi and Luo, Siqi and Xu, Tianxiang and Qin, Qi and Chen, Haoxing and Zhu, Kaiwen and Zhang, Zhiwei and He, Yangfan and Zhang, Rongchao and Bai, Jinbin and Cao, Shuo and Fu, Bin and He, Junjun and Liu, Yihao and Cao, Yuewen and Liu, Xiaohong}, title = {dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35726-35735} }
Learning to See and Act: Task-Aware Virtual View Exploration for Robotic Manipulation: Yongjie Bai,

Zhouxia Wang,

Yang Liu,

Kaijun Luo,

Yifan Wen,

Mingtong Dai,

Weixing Chen,

Ziliang Chen,

Lingbo Liu,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Yongjie and Wang, Zhouxia and Liu, Yang and Luo, Kaijun and Wen, Yifan and Dai, Mingtong and Chen, Weixing and Chen, Ziliang and Liu, Lingbo and Li, Guanbin and Lin, Liang}, title = {Learning to See and Act: Task-Aware Virtual View Exploration for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13386-13396} }
RMAE-ProGRess: Advancing Semantic Segmentation in Unstructured Environments: Manish Bhurtel,

Danda B. Rawat; [pdf] [supp]
[bibtex]
@InProceedings{Bhurtel_2026_CVPR, author = {Bhurtel, Manish and Rawat, Danda B.}, title = {RMAE-ProGRess: Advancing Semantic Segmentation in Unstructured Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20511-20520} }
DUET-VLM: Dual stage Unified Efficient Token reduction for VLM Training and Inference: Aditya Kumar Singh,

Hitesh Kandala,

Pratik Prabhanjan Brahma,

Zicheng Liu,

Emad Barsoum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Aditya Kumar and Kandala, Hitesh and Brahma, Pratik Prabhanjan and Liu, Zicheng and Barsoum, Emad}, title = {DUET-VLM: Dual stage Unified Efficient Token reduction for VLM Training and Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17651-17660} }
Downscaling Intelligence: Exploring Perception and Reasoning Bottlenecks in Small Multimodal Models: Mark Endo,

Serena Yeung-Levy; [pdf] [supp]
[bibtex]
@InProceedings{Endo_2026_CVPR, author = {Endo, Mark and Yeung-Levy, Serena}, title = {Downscaling Intelligence: Exploring Perception and Reasoning Bottlenecks in Small Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {778-788} }
LAMP: Localization Aware Multi-camera People Tracking in Metric 3D World: Nan Yang,

Julian Straub,

Fan Zhang,

Richard Newcombe,

Jakob Engel,

Lingni Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Nan and Straub, Julian and Zhang, Fan and Newcombe, Richard and Engel, Jakob and Ma, Lingni}, title = {LAMP: Localization Aware Multi-camera People Tracking in Metric 3D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21208-21220} }
FPS-Bench: A Benchmark for High Frame-Rate Video Understanding: Rohan Choudhury,

Jean-Sebastien Dandurand,

Kai Qiu,

Kshitij Madhav Bhat,

Kartik Sharma,

Liza Dahiya,

Yizhou Zhao,

Souraja Kundu,

Chun-Hsien Lin,

Kris M. Kitani,

László A. Jeni; [pdf] [supp]
[bibtex]
@InProceedings{Choudhury_2026_CVPR, author = {Choudhury, Rohan and Dandurand, Jean-Sebastien and Qiu, Kai and Bhat, Kshitij Madhav and Sharma, Kartik and Dahiya, Liza and Zhao, Yizhou and Kundu, Souraja and Lin, Chun-Hsien and Kitani, Kris M. and Jeni, L\'aszl\'o A.}, title = {FPS-Bench: A Benchmark for High Frame-Rate Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18598-18608} }
Conan: Progressive Learning to Reason Like a Detective over Multi-Scale Visual Evidence: Kun Ouyang,

Yuanxin Liu,

Linli Yao,

Yishuo Cai,

Hao Zhou,

Fandong Meng,

Jie Zhou,

Xu Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Kun and Liu, Yuanxin and Yao, Linli and Cai, Yishuo and Zhou, Hao and Meng, Fandong and Zhou, Jie and Sun, Xu}, title = {Conan: Progressive Learning to Reason Like a Detective over Multi-Scale Visual Evidence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41089-41099} }
Points-to-3D: Structure-Aware 3D Generation with Point Cloud Priors: Jiatong Xia,

Zicheng Duan,

Anton van den Hengel,

Lingqiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Jiatong and Duan, Zicheng and van den Hengel, Anton and Liu, Lingqiao}, title = {Points-to-3D: Structure-Aware 3D Generation with Point Cloud Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19928-19939} }
Chain-of-Models Pre-Training: Rethinking Training Acceleration of Vision Foundation Models: Jiawei Fan,

Shigeng Wang,

Chao Li,

Xiaolong Liu,

Anbang Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Jiawei and Wang, Shigeng and Li, Chao and Liu, Xiaolong and Yao, Anbang}, title = {Chain-of-Models Pre-Training: Rethinking Training Acceleration of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34491-34501} }
Masked Auto-Regressive Variational Acceleration: Fast Inference Makes Practical Reinforcement Learning: Yuxuan Gu,

Weimin Bai,

Yifei Wang,

Weijian Luo,

He Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Yuxuan and Bai, Weimin and Wang, Yifei and Luo, Weijian and Sun, He}, title = {Masked Auto-Regressive Variational Acceleration: Fast Inference Makes Practical Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41881-41891} }
ZoomEarth: Active Perception for Ultra-High-Resolution Geospatial Vision-Language Tasks: Ruixun Liu,

Bowen Fu,

Jiayi Song,

Kaiyu Li,

Wanchen Li,

Lanxuan Xue,

Hui Qiao,

Weizhan Zhang,

Deyu Meng,

Xiangyong Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ruixun and Fu, Bowen and Song, Jiayi and Li, Kaiyu and Li, Wanchen and Xue, Lanxuan and Qiao, Hui and Zhang, Weizhan and Meng, Deyu and Cao, Xiangyong}, title = {ZoomEarth: Active Perception for Ultra-High-Resolution Geospatial Vision-Language Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34877-34888} }
V-RGBX: Video Editing with Accurate Controls over Intrinsic Properties: Ye Fang,

Tong Wu,

Valentin Deschaintre,

Duygu Ceylan,

Iliyan Georgiev,

Chun-Hao Paul Huang,

Yiwei Hu,

Xuelin Chen,

Tuanfeng Yang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Ye and Wu, Tong and Deschaintre, Valentin and Ceylan, Duygu and Georgiev, Iliyan and Huang, Chun-Hao Paul and Hu, Yiwei and Chen, Xuelin and Wang, Tuanfeng Yang}, title = {V-RGBX: Video Editing with Accurate Controls over Intrinsic Properties}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23182-23192} }
IAFMNet: Information-Aware Feature Modulation for Efficient Super-Resolution: Junwei Xu,

Mengzu Liu,

Zhenyu Wang,

Fangfang Wu,

Sijia Wu,

Tao Huang,

Weisheng Dong; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Junwei and Liu, Mengzu and Wang, Zhenyu and Wu, Fangfang and Wu, Sijia and Huang, Tao and Dong, Weisheng}, title = {IAFMNet: Information-Aware Feature Modulation for Efficient Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30564-30573} }
ViLearn: Accelerating Training Convergence of Image-to-3D Generation via Visibility Learning: Rui Chen,

Jianfeng Zhang,

Jing Lin,

Xuanyu Yi,

Yixun Liang,

Guan Luo,

Xiu Li,

Zeming Li,

Ping Tan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Rui and Zhang, Jianfeng and Lin, Jing and Yi, Xuanyu and Liang, Yixun and Luo, Guan and Li, Xiu and Li, Zeming and Tan, Ping}, title = {ViLearn: Accelerating Training Convergence of Image-to-3D Generation via Visibility Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27041-27051} }
FlexiVideo: Variation-Aware Temporal Dynamics Modeling for Efficient Video Understanding: Da Peng,

Xuesong Yang,

Zonghao Guo,

Yichen Zhang,

Chi Chen,

Yidan Zhang,

Yuan Yao,

Fang Wan,

Wei Ke,

Maosong Sun; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Da and Yang, Xuesong and Guo, Zonghao and Zhang, Yichen and Chen, Chi and Zhang, Yidan and Yao, Yuan and Wan, Fang and Ke, Wei and Sun, Maosong}, title = {FlexiVideo: Variation-Aware Temporal Dynamics Modeling for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9804-9814} }
Seeing Through the Shift: Causality-Inspired Robust Generalized Category Discovery: Wei Feng,

Yiwen Jiang,

Sijin Zhou,

Zhuang Qi,

Zhongxing Xu,

Zhonghua Wang,

Feilong Tang,

Zongyuan Ge; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Jiang, Yiwen and Zhou, Sijin and Qi, Zhuang and Xu, Zhongxing and Wang, Zhonghua and Tang, Feilong and Ge, Zongyuan}, title = {Seeing Through the Shift: Causality-Inspired Robust Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17766-17775} }
PortraitDirector: A Hierarchical Disentanglement Framework for Controllable and Real-time Facial Reenactment: Chaonan Ji,

Jinwei Qi,

Sheng Xu,

Peng Zhang,

Bang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Chaonan and Qi, Jinwei and Xu, Sheng and Zhang, Peng and Zhang, Bang}, title = {PortraitDirector: A Hierarchical Disentanglement Framework for Controllable and Real-time Facial Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32377-32388} }
Vinedresser3D: Towards Agentic Text-guided 3D Editing: Yankuan Chi,

Xiang Li,

Zixuan Huang,

James Matthew Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Chi_2026_CVPR, author = {Chi, Yankuan and Li, Xiang and Huang, Zixuan and Rehg, James Matthew}, title = {Vinedresser3D: Towards Agentic Text-guided 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12673-12683} }
FlashVSR: Towards Real-time Diffusion-Based Streaming Video Super Resolution: Junhao Zhuang,

Shi Guo,

Xin Cai,

Xiaohui Li,

Yihao Liu,

Chun Yuan,

Tianfan Xue; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Junhao and Guo, Shi and Cai, Xin and Li, Xiaohui and Liu, Yihao and Yuan, Chun and Xue, Tianfan}, title = {FlashVSR: Towards Real-time Diffusion-Based Streaming Video Super Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43482-43493} }
R-C2: Cycle-Consistent Reinforcement Learning Improves Multimodal Reasoning: Zirui Zhang,

Haoyu Dong,

Kexin Pei,

Chengzhi Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zirui and Dong, Haoyu and Pei, Kexin and Mao, Chengzhi}, title = {R-C2: Cycle-Consistent Reinforcement Learning Improves Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36893-36903} }
Inference-time Physics Alignment of Video Generative Models with Latent World Models: Jianhao Yuan,

Xiaofeng Zhang,

Felix Friedrich,

Nicolas Beltran-Velez,

Melissa Hall,

Reyhane Askari-Hemmat,

Xiaochuang Han,

Nicolas Ballas,

Michal Drozdzal,

Adriana Romero-Soriano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Jianhao and Zhang, Xiaofeng and Friedrich, Felix and Beltran-Velez, Nicolas and Hall, Melissa and Askari-Hemmat, Reyhane and Han, Xiaochuang and Ballas, Nicolas and Drozdzal, Michal and Romero-Soriano, Adriana}, title = {Inference-time Physics Alignment of Video Generative Models with Latent World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16118-16129} }
VideoMaMa: Mask-Guided Video Matting via Generative Prior: Sangbeom Lim,

Seoung Wug Oh,

Jiahui Huang,

Heeji Yoon,

Seungryong Kim,

Joon-Young Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lim_2026_CVPR, author = {Lim, Sangbeom and Oh, Seoung Wug and Huang, Jiahui and Yoon, Heeji and Kim, Seungryong and Lee, Joon-Young}, title = {VideoMaMa: Mask-Guided Video Matting via Generative Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3845-3855} }
Unlocking Motion from Large Vision Models with a Semantic and Kinematic Duality for Gait Recognition: Zhanbo Huang,

Dingqiang Ye,

Xiaoming Liu,

Yu Kong; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhanbo and Ye, Dingqiang and Liu, Xiaoming and Kong, Yu}, title = {Unlocking Motion from Large Vision Models with a Semantic and Kinematic Duality for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28379-28390} }
Hear What You See: Video-to-Audio Generation with Diffusion Transformer and Semantic-Temporal Alignment-Ranked Direct Preference Optimization: Kai Wang,

Tao Zhou,

Jiayi Lei,

Jing Wang,

Jinman Zhao,

Weiguo Pian,

Yuan Cheng,

Yapeng Tian,

Peng Gao,

Bin Fu,

Yihao Liu,

Dimitrios Hatzinakos,

Yuewen Cao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Kai and Zhou, Tao and Lei, Jiayi and Wang, Jing and Zhao, Jinman and Pian, Weiguo and Cheng, Yuan and Tian, Yapeng and Gao, Peng and Fu, Bin and Liu, Yihao and Hatzinakos, Dimitrios and Cao, Yuewen}, title = {Hear What You See: Video-to-Audio Generation with Diffusion Transformer and Semantic-Temporal Alignment-Ranked Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43396-43406} }
ORD: Object-Relation Decoupling for Generalized 3D Visual Grounding: Ronggang Huang,

Fansen Meng,

Huaidong Zhang,

Xuemiao Xu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Ronggang and Meng, Fansen and Zhang, Huaidong and Xu, Xuemiao}, title = {ORD: Object-Relation Decoupling for Generalized 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30964-30973} }
UCMNet: Uncertainty-Aware Context Memory Network for Under-Display Camera Image Restoration: Daehyun Kim,

Youngmin Kim,

Yoon Ju Oh,

Tae Hyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Daehyun and Kim, Youngmin and Oh, Yoon Ju and Kim, Tae Hyun}, title = {UCMNet: Uncertainty-Aware Context Memory Network for Under-Display Camera Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29939-29948} }
PARSE: Part-Aware Relational Spatial Modeling: Yinuo Bai,

Peijun Xu,

Kuixiang Shao,

Yuyang Jiao,

Jingxuan Zhang,

Kaixin Yao,

Jiayuan Gu,

Jingyi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Yinuo and Xu, Peijun and Shao, Kuixiang and Jiao, Yuyang and Zhang, Jingxuan and Yao, Kaixin and Gu, Jiayuan and Yu, Jingyi}, title = {PARSE: Part-Aware Relational Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38700-38710} }
SDTrack: A Baseline for Event-based Tracking via Spiking Neural Networks: Yimeng Shan,

Zhenbang Ren,

Haodi Wu,

Wenjie Wei,

Rui-Jie Zhu,

Shuai Wang,

Dehao Zhang,

Yichen Xiao,

Jieyuan Zhang,

Kexin Shi,

Jingzhinan Wang,

Jason K. Eshraghian,

Haicheng Qu,

Malu Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Yimeng and Ren, Zhenbang and Wu, Haodi and Wei, Wenjie and Zhu, Rui-Jie and Wang, Shuai and Zhang, Dehao and Xiao, Yichen and Zhang, Jieyuan and Shi, Kexin and Wang, Jingzhinan and Eshraghian, Jason K. and Qu, Haicheng and Zhang, Malu}, title = {SDTrack: A Baseline for Event-based Tracking via Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36245-36254} }
UniPercept: A Unified Diffusion Model for Generalizable Visual Perception: Zuyan Zhao,

Zhenliang He,

Meina Kan,

Shiguang Shan,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zuyan and He, Zhenliang and Kan, Meina and Shan, Shiguang and Chen, Xilin}, title = {UniPercept: A Unified Diffusion Model for Generalizable Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43175-43186} }
WebChain: A Large-Scale Human-Annotated Dataset of Real-World Web Interaction Traces: Sicheng Fan,

Rui Wan,

Yifei Leng,

Gaoning Liang,

Li Ling,

Yanyi Shang,

Dehan Kong; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Sicheng and Wan, Rui and Leng, Yifei and Liang, Gaoning and Ling, Li and Shang, Yanyi and Kong, Dehan}, title = {WebChain: A Large-Scale Human-Annotated Dataset of Real-World Web Interaction Traces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6209-6218} }
Probabilistic Precipitation Nowcasting with Rectified Flow Transformers: Johannes Schusterbauer,

Jannik Wiese,

Nick Stracke,

Timy Phan,

Björn Ommer; [pdf] [supp]
[bibtex]
@InProceedings{Schusterbauer_2026_CVPR, author = {Schusterbauer, Johannes and Wiese, Jannik and Stracke, Nick and Phan, Timy and Ommer, Bj\"orn}, title = {Probabilistic Precipitation Nowcasting with Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25742-25756} }
PR-MaGIC: Prompt Refinement Via Mask Decoder Gradient Flow For In-Context Segmentation: Minjae Lee,

Sungwoo Hur,

Soojin Hwang,

Won Hwa Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Minjae and Hur, Sungwoo and Hwang, Soojin and Kim, Won Hwa}, title = {PR-MaGIC: Prompt Refinement Via Mask Decoder Gradient Flow For In-Context Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21659-21668} }
PixDLM: A Dual-Path Multimodal Language Model for UAV Reasoning Segmentation: Shuyan Ke,

Yifan Mei,

Changli Wu,

Yonghan Zheng,

Jiayi Ji,

Liujuan Cao,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Shuyan and Mei, Yifan and Wu, Changli and Zheng, Yonghan and Ji, Jiayi and Cao, Liujuan and Ji, Rongrong}, title = {PixDLM: A Dual-Path Multimodal Language Model for UAV Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26165-26175} }
BrepVGAE: Variational Graph Autoencoder with Unified Latent Representation for B-rep: Hao Guo,

Liyuan Deng,

Yongkang Dai,

Ruohan Wang,

Jiahao Li,

Yunpeng Bai,

Yilei Shi; [pdf]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hao and Deng, Liyuan and Dai, Yongkang and Wang, Ruohan and Li, Jiahao and Bai, Yunpeng and Shi, Yilei}, title = {BrepVGAE: Variational Graph Autoencoder with Unified Latent Representation for B-rep}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39230-39238} }
EvoComp: Learning Visual Token Compression for Multimodal Large Language Models via Semantic-Guided Evolutionary Labeling: Jiafei Song,

Fengwei Zhou,

Jin Qu,

Wenjin Jason Li,

Tong Wu,

Gengjian Xue,

Zhikang Zhao,

Daomin Wei,

Yichao Lu,

Bailin Na; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Jiafei and Zhou, Fengwei and Qu, Jin and Li, Wenjin Jason and Wu, Tong and Xue, Gengjian and Zhao, Zhikang and Wei, Daomin and Lu, Yichao and Na, Bailin}, title = {EvoComp: Learning Visual Token Compression for Multimodal Large Language Models via Semantic-Guided Evolutionary Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3532-3542} }
Vibe Spaces for Creatively Connecting and Expressing Visual Concepts: Huzheng Yang,

Katherine Xu,

Andrew Lu,

Michael D. Grossberg,

Yutong Bai,

Jianbo Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Huzheng and Xu, Katherine and Lu, Andrew and Grossberg, Michael D. and Bai, Yutong and Shi, Jianbo}, title = {Vibe Spaces for Creatively Connecting and Expressing Visual Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21912-21921} }
The Golden Subspace: Where Efficiency Meets Generalization in Continual Test-Time Adaptation: Guannan Lai,

Da-Wei Zhou,

Zhenguo Li,

Han-Jia Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Guannan and Zhou, Da-Wei and Li, Zhenguo and Ye, Han-Jia}, title = {The Golden Subspace: Where Efficiency Meets Generalization in Continual Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3866-3875} }
Dual-level Adaptation for Multi-Object Tracking: Building Test-Time Calibration from Experience and Intuition: Wen Guo,

Pengfei Zhao,

Zongmeng Wang,

Yufan Hu,

Junyu Gao; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Wen and Zhao, Pengfei and Wang, Zongmeng and Hu, Yufan and Gao, Junyu}, title = {Dual-level Adaptation for Multi-Object Tracking: Building Test-Time Calibration from Experience and Intuition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28190-28200} }
DynFusion: Rethinking Condition Fusion for Adaptive Multi-Conditional Text-to-Image Generation: Zheng Fang,

Lichuan Xiang,

Xu Cai,

Bing Wang,

Bo Yang,

Hongkai Wen; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Zheng and Xiang, Lichuan and Cai, Xu and Wang, Bing and Yang, Bo and Wen, Hongkai}, title = {DynFusion: Rethinking Condition Fusion for Adaptive Multi-Conditional Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29103-29113} }
Long-SCOPE: Fully Sparse Long-Range Cooperative 3D Perception: Jiahao Wang,

Zikun Xu,

Yuner Zhang,

Zhongwei Jiang,

Chenyang Lu,

Shuocheng Yang,

Yuxuan Wang,

Jiaru Zhong,

Chuang Zhang,

Shaobing Xu,

Jianqiang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Xu, Zikun and Zhang, Yuner and Jiang, Zhongwei and Lu, Chenyang and Yang, Shuocheng and Wang, Yuxuan and Zhong, Jiaru and Zhang, Chuang and Xu, Shaobing and Wang, Jianqiang}, title = {Long-SCOPE: Fully Sparse Long-Range Cooperative 3D Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11599-11609} }
LongVT: Incentivizing "Thinking with Long Videos" via Native Tool Calling: Zuhao Yang,

Sudong Wang,

Kaichen Zhang,

Keming Wu,

Sicong Leng,

Yifan Zhang,

Bo Li,

Chengwei Qin,

Shijian Lu,

Xingxuan Li,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zuhao and Wang, Sudong and Zhang, Kaichen and Wu, Keming and Leng, Sicong and Zhang, Yifan and Li, Bo and Qin, Chengwei and Lu, Shijian and Li, Xingxuan and Bing, Lidong}, title = {LongVT: Incentivizing ''Thinking with Long Videos'' via Native Tool Calling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33816-33826} }
Elucidating the SNR-t Bias of Diffusion Probabilistic Models: Meng Yu,

Lei Sun,

Jianhao Zeng,

Xiangxiang Chu,

Kun Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Meng and Sun, Lei and Zeng, Jianhao and Chu, Xiangxiang and Zhan, Kun}, title = {Elucidating the SNR-t Bias of Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43461-43470} }
Momentum Memory for Knowledge Distillation in Computational Pathology: Yongxin Guo,

Hao Lu,

Onur C. Koyun,

Zhengjie Zhu,

Muhammet F. Demir,

Metin N. Gurcan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yongxin and Lu, Hao and Koyun, Onur C. and Zhu, Zhengjie and Demir, Muhammet F. and Gurcan, Metin N.}, title = {Momentum Memory for Knowledge Distillation in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6889-6899} }
Revisiting Token Compression for Accelerating ViT-based Sparse Multi-View 3D Object Detectors: Mingqian Ji,

Shanshan Zhang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Mingqian and Zhang, Shanshan and Yang, Jian}, title = {Revisiting Token Compression for Accelerating ViT-based Sparse Multi-View 3D Object Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18776-18785} }
Efficiently Reconstructing Dynamic Scenes One D4RT at a Time: Chuhan Zhang,

Guillaume Le Moing,

Skanda Koppula,

Ignacio Rocco,

Liliane Momeni,

Junyu Xie,

Shuyang Sun,

Rahul Sukthankar,

Joëlle K. Barral,

Raia Hadsell,

Zoubin Ghahramani,

Andrew Zisserman,

Junlin Zhang,

Mehdi S. M. Sajjadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chuhan and Le Moing, Guillaume and Koppula, Skanda and Rocco, Ignacio and Momeni, Liliane and Xie, Junyu and Sun, Shuyang and Sukthankar, Rahul and Barral, Jo\"elle K. and Hadsell, Raia and Ghahramani, Zoubin and Zisserman, Andrew and Zhang, Junlin and Sajjadi, Mehdi S. M.}, title = {Efficiently Reconstructing Dynamic Scenes One D4RT at a Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7382-7392} }
CD-Buffer: Complementary Dual-Buffer Framework for Test-Time Adaptation in Adverse Weather Object Detection: Youngjun Song,

Hyeongyu Kim,

Dosik Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Youngjun and Kim, Hyeongyu and Hwang, Dosik}, title = {CD-Buffer: Complementary Dual-Buffer Framework for Test-Time Adaptation in Adverse Weather Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15050-15059} }
Ref4D-VideoBench: Four-Dimensional Reference-Based Evaluation of Text-to-Video Generative Models: Jiajia Wei,

Yujia He,

Yuhan Hou,

Hang Qi,

Sihua Wang,

Jincheng Shi,

Kwok Fung Li,

Zibin Zheng,

Weibin Wu; [pdf]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jiajia and He, Yujia and Hou, Yuhan and Qi, Hang and Wang, Sihua and Shi, Jincheng and Li, Kwok Fung and Zheng, Zibin and Wu, Weibin}, title = {Ref4D-VideoBench: Four-Dimensional Reference-Based Evaluation of Text-to-Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7719-7729} }
ChArtist: Generating Pictorial Charts with Unified Spatial and Subject Control: Shishi Xiao,

Tongyu Zhou,

David H. Laidlaw,

Gromit Yeuk-Yin Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Shishi and Zhou, Tongyu and Laidlaw, David H. and Chan, Gromit Yeuk-Yin}, title = {ChArtist: Generating Pictorial Charts with Unified Spatial and Subject Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29211-29221} }
TeHOR: Text-Guided 3D Human and Object Reconstruction with Textures: Hyeongjin Nam,

Daniel Sungho Jung,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2026_CVPR, author = {Nam, Hyeongjin and Jung, Daniel Sungho and Lee, Kyoung Mu}, title = {TeHOR: Text-Guided 3D Human and Object Reconstruction with Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7100-7110} }
Adaptive Video Distillation: Mitigating Oversaturation and Temporal Collapse in Few-Step Generation: Yuyang You,

Yongzhi Li,

Jiahui Li,

Yadong Mu,

Quan Chen,

Peng Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2026_CVPR, author = {You, Yuyang and Li, Yongzhi and Li, Jiahui and Mu, Yadong and Chen, Quan and Jiang, Peng}, title = {Adaptive Video Distillation: Mitigating Oversaturation and Temporal Collapse in Few-Step Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43429-43439} }
Boosting Document Parsing Efficiency and Performance with Coarse-to-Fine Visual Processing: Cheng Cui,

Ting Sun,

Suyin Liang,

Tingquan Gao,

Zelun Zhang,

Jiaxuan Liu,

Xueqing Wang,

Changda Zhou,

Hongen Liu,

Manhui Lin,

Yue Zhang,

Yubo Zhang,

Jing Zhang,

Jun Zhang,

Xing Wei,

Yi Liu,

Dianhai Yu,

Yanjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Cheng and Sun, Ting and Liang, Suyin and Gao, Tingquan and Zhang, Zelun and Liu, Jiaxuan and Wang, Xueqing and Zhou, Changda and Liu, Hongen and Lin, Manhui and Zhang, Yue and Zhang, Yubo and Zhang, Jing and Zhang, Jun and Wei, Xing and Liu, Yi and Yu, Dianhai and Ma, Yanjun}, title = {Boosting Document Parsing Efficiency and Performance with Coarse-to-Fine Visual Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16655-16665} }
Divide, Conquer, and Aggregate: Asymmetric Experts for Class-Imbalanced Semi-Supervised Medical Image Segmentation: Yajun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yajun}, title = {Divide, Conquer, and Aggregate: Asymmetric Experts for Class-Imbalanced Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8503-8513} }
FlashPortrait: 6x Faster Infinite Portrait Animation with Adaptive Latent Prediction: Shuyuan Tu,

Yueming Pan,

Yinming Huang,

Xintong Han,

Zhen Xing,

Qi Dai,

Kai Qiu,

Chong Luo,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Shuyuan and Pan, Yueming and Huang, Yinming and Han, Xintong and Xing, Zhen and Dai, Qi and Qiu, Kai and Luo, Chong and Wu, Zuxuan}, title = {FlashPortrait: 6x Faster Infinite Portrait Animation with Adaptive Latent Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25163-25173} }
Dynamic Logits Adjustment and Exploration for Test-Time Adaptation in Vision Language Models: Haoyan Wu,

Yahao Liu,

Yinjie Lei,

Lixin Duan,

Wen Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haoyan and Liu, Yahao and Lei, Yinjie and Duan, Lixin and Li, Wen}, title = {Dynamic Logits Adjustment and Exploration for Test-Time Adaptation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3143-3153} }
Interpretable Cross-Domain Few-Shot Learning with Rectified Target-Domain Local Alignment: Yaze Zhao,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yaze and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Interpretable Cross-Domain Few-Shot Learning with Rectified Target-Domain Local Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41605-41615} }
Structural Action Transformer for 3D Dexterous Manipulation: Xiaohan Lei,

Min Wang,

Bohong Weng,

Wengang Zhou,

Houqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Xiaohan and Wang, Min and Weng, Bohong and Zhou, Wengang and Li, Houqiang}, title = {Structural Action Transformer for 3D Dexterous Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28807-28818} }
SECOS: Semantic Capture for Rigorous Classification in Open-World Semi-Supervised Learning: Hezhao Liu,

Jiacheng Yang,

Junlong Gao,

Mengke Li,

Yiqun Zhang,

Shreyank N Gowda,

Yang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hezhao and Yang, Jiacheng and Gao, Junlong and Li, Mengke and Zhang, Yiqun and Gowda, Shreyank N and Lu, Yang}, title = {SECOS: Semantic Capture for Rigorous Classification in Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39627-39636} }
EffectMaker: Unifying Reasoning and Generation for Customized Visual Effect Creation: Shiyuan Yang,

Ruihuang Li,

Jiale Tao,

Shuai Shao,

Qinglin Lu,

Jing Liao; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Shiyuan and Li, Ruihuang and Tao, Jiale and Shao, Shuai and Lu, Qinglin and Liao, Jing}, title = {EffectMaker: Unifying Reasoning and Generation for Customized Visual Effect Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16108-16117} }
CARE What Fails: Contrastive Anchored-REflection for Verifiable Multimodal Reasoning: Yongxin Wang,

Zhicheng Yang,

Meng Cao,

Mingfei Han,

Haokun Lin,

Yingying Zhu,

Xiaojun Chang,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yongxin and Yang, Zhicheng and Cao, Meng and Han, Mingfei and Lin, Haokun and Zhu, Yingying and Chang, Xiaojun and Liang, Xiaodan}, title = {CARE What Fails: Contrastive Anchored-REflection for Verifiable Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11976-11986} }
SOTA: Self-adaptive Optimal Transport for Zero-Shot Classification with Multiple Foundation Models: Zhanxuan Hu,

Qiyu Xu,

Yu Duan,

Yonghang Tai,

Huafeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zhanxuan and Xu, Qiyu and Duan, Yu and Tai, Yonghang and Li, Huafeng}, title = {SOTA: Self-adaptive Optimal Transport for Zero-Shot Classification with Multiple Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26624-26634} }
Mechanisms of Object Localization in Vision-Language Models: Timothy Schaumlöffel,

Martina G. Vilas,

Gemma Roig; [pdf] [supp]
[bibtex]
@InProceedings{Schaumloffel_2026_CVPR, author = {Schauml\"offel, Timothy and Vilas, Martina G. and Roig, Gemma}, title = {Mechanisms of Object Localization in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31356-31365} }
Bridge: Basis-Driven Causal Inference Marries VFMs for Domain Generalization: Mingbo Hong,

Feng Liu,

Caroline Gevaert,

George Vosselman,

Hao Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Mingbo and Liu, Feng and Gevaert, Caroline and Vosselman, George and Cheng, Hao}, title = {Bridge: Basis-Driven Causal Inference Marries VFMs for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31962-31973} }
Dual-Level Confidence based Implicit Self-Refinement for Medical Visual Question Answering: Meihong Pan,

Yefeng Zheng; [pdf]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Meihong and Zheng, Yefeng}, title = {Dual-Level Confidence based Implicit Self-Refinement for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17215-17225} }
Beyond Tie Points: Satellite Image Block Adjustment based on Dense Feature Consistency: Yi Liu,

Yi Wan,

Lei Yu,

Panwang Xia,

Qiong Wu,

Yingying Pei,

Xuejun Huang,

Junjian Zhang,

Xiangyuan Cai,

Hongwei Hu,

Yongjun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yi and Wan, Yi and Yu, Lei and Xia, Panwang and Wu, Qiong and Pei, Yingying and Huang, Xuejun and Zhang, Junjian and Cai, Xiangyuan and Hu, Hongwei and Zhang, Yongjun}, title = {Beyond Tie Points: Satellite Image Block Adjustment based on Dense Feature Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6443-6452} }
Video-as-Answer: Predict and Generate Next Video Event with Joint-GRPO: Junhao Cheng,

Liang Hou,

Xin Tao,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Junhao and Hou, Liang and Tao, Xin and Liao, Jing}, title = {Video-as-Answer: Predict and Generate Next Video Event with Joint-GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38915-38925} }
Refer-Agent: A Collaborative Multi-Agent System with Reasoning and Reflection for Referring Video Object Segmentation: Haichao Jiang,

Tianming Liang,

Wei-Shi Zheng,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haichao and Liang, Tianming and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Refer-Agent: A Collaborative Multi-Agent System with Reasoning and Reflection for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39508-39517} }
SkeletonContext: Skeleton-side Context Prompt Learning for Zero-Shot Skeleton-based Action Recognition: Ning Wang,

Tieyue Wu,

Naeha Sharif,

Farid Boussaid,

Guangming Zhu,

Lin Mei,

Mohammed Bennamoun,

Liang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ning and Wu, Tieyue and Sharif, Naeha and Boussaid, Farid and Zhu, Guangming and Mei, Lin and Bennamoun, Mohammed and Zhang, Liang}, title = {SkeletonContext: Skeleton-side Context Prompt Learning for Zero-Shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20170-20180} }
KVSmooth: Mitigating Hallucination in Multi-modal Large Language Models through Key-Value Smoothing: Siyu Jiang,

Feiyang Chen,

Xiaojin Zhang,

Kun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Siyu and Chen, Feiyang and Zhang, Xiaojin and He, Kun}, title = {KVSmooth: Mitigating Hallucination in Multi-modal Large Language Models through Key-Value Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32561-32571} }
Extend3D: Town-Scale 3D Generation: Seungwoo Yoon,

Jinmo Kim,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2026_CVPR, author = {Yoon, Seungwoo and Kim, Jinmo and Park, Jaesik}, title = {Extend3D: Town-Scale 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5892-5901} }
PaCo-RL: Advancing Reinforcement Learning for Consistent Image Generation with Pairwise Reward Modeling: Bowen Ping,

Chengyou Jia,

Minnan Luo,

Changliang Xia,

Xin Shen,

Zhuohang Dang,

Hangwei Qian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ping_2026_CVPR, author = {Ping, Bowen and Jia, Chengyou and Luo, Minnan and Xia, Changliang and Shen, Xin and Dang, Zhuohang and Qian, Hangwei}, title = {PaCo-RL: Advancing Reinforcement Learning for Consistent Image Generation with Pairwise Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34353-34363} }
A Style is Worth One Code: Unlocking Code-to-Style Image Generation with Discrete Style Space: Huijie Liu,

Shuhao Cui,

Haoxiang Cao,

Shuai Ma,

Kai Wu,

Guoliang Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Huijie and Cui, Shuhao and Cao, Haoxiang and Ma, Shuai and Wu, Kai and Kang, Guoliang}, title = {A Style is Worth One Code: Unlocking Code-to-Style Image Generation with Discrete Style Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1-10} }
Mitigating Multimodal Hallucinations via Gradient-based Self-Reflection: Shan Wang,

Maying Shen,

Nadine Chang,

Chuong Nguyen,

Hongdong Li,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shan and Shen, Maying and Chang, Nadine and Nguyen, Chuong and Li, Hongdong and Alvarez, Jose M.}, title = {Mitigating Multimodal Hallucinations via Gradient-based Self-Reflection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32539-32549} }
GenHOI: Towards Object-Consistent Hand-Object Interaction with Temporally Balanced and Spatially Selective Object Injection: Xuan Huang,

Mochu Xiang,

Zhelun Shen,

Jinbo Wu,

Chenming Wu,

Chen Zhao,

Kaisiyuan Wang,

Hang Zhou,

Shanshan Liu,

Haocheng Feng,

Wei He,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Xuan and Xiang, Mochu and Shen, Zhelun and Wu, Jinbo and Wu, Chenming and Zhao, Chen and Wang, Kaisiyuan and Zhou, Hang and Liu, Shanshan and Feng, Haocheng and He, Wei and Wang, Jingdong}, title = {GenHOI: Towards Object-Consistent Hand-Object Interaction with Temporally Balanced and Spatially Selective Object Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23117-23127} }
Heuristic Self-Paced Learning for Domain Adaptive Semantic Segmentation under Adverse Conditions: Shiqin Wang,

Haoyang Chen,

Huaizhou Huang,

Yinkan He,

Dongfang Sun,

Xiaoqing Chen,

Xingyu Liu,

Zheng Wang,

Kaiyan Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shiqin and Chen, Haoyang and Huang, Huaizhou and He, Yinkan and Sun, Dongfang and Chen, Xiaoqing and Liu, Xingyu and Wang, Zheng and Zhao, Kaiyan}, title = {Heuristic Self-Paced Learning for Domain Adaptive Semantic Segmentation under Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3815-3824} }
GeoPredict: Leveraging Predictive Kinematics and 3D Gaussian Geometry for Precise VLA Manipulation: Jingjing Qian,

Boyao Han,

Chen Shi,

Lei Xiao,

Long Yang,

Shaoshuai Shi,

Li Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Jingjing and Han, Boyao and Shi, Chen and Xiao, Lei and Yang, Long and Shi, Shaoshuai and Jiang, Li}, title = {GeoPredict: Leveraging Predictive Kinematics and 3D Gaussian Geometry for Precise VLA Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13529-13539} }
INSIGHT Bench: Towards Grounded IN-SItu Guidance for Robotic ManipulaTion: Seonho Kim,

Junhyeong Hong,

Kyungjae Lee,

Yoonseon Oh; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Seonho and Hong, Junhyeong and Lee, Kyungjae and Oh, Yoonseon}, title = {INSIGHT Bench: Towards Grounded IN-SItu Guidance for Robotic ManipulaTion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35070-35079} }
PoseGAM: Robust Unseen Object Pose Estimation via Geometry-Aware Multi-View Reasoning: Jianqi Chen,

Biao Zhang,

Xiangjun Tang,

Peter Wonka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jianqi and Zhang, Biao and Tang, Xiangjun and Wonka, Peter}, title = {PoseGAM: Robust Unseen Object Pose Estimation via Geometry-Aware Multi-View Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7197-7208} }
Event-based Motion Deblurring with Unpaired Data: Hoonhee Cho,

Yuhwan Jeong,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Hoonhee and Jeong, Yuhwan and Yoon, Kuk-Jin}, title = {Event-based Motion Deblurring with Unpaired Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {882-891} }
DualMirage: Hunting Stealthy Multimodal LLM Agents via CAPTCHAs with Contour and Adversarial Illusions: Bei Chen,

Gaolei Li,

Jun Wu,

Jianhua Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Bei and Li, Gaolei and Wu, Jun and Li, Jianhua}, title = {DualMirage: Hunting Stealthy Multimodal LLM Agents via CAPTCHAs with Contour and Adversarial Illusions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1523-1532} }
VGGT-Det: Mining VGGT Internal Priors for Sensor-Geometry-Free Multi-View Indoor 3D Object Detection: Yang Cao,

Feize Wu,

Dave Zhenyu Chen,

Yingji Zhong,

Lanqing Hong,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yang and Wu, Feize and Chen, Dave Zhenyu and Zhong, Yingji and Hong, Lanqing and Xu, Dan}, title = {VGGT-Det: Mining VGGT Internal Priors for Sensor-Geometry-Free Multi-View Indoor 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4708-4717} }
Hugging Visual Prompt and Segmentation Tokens: Consistency Learning for Fine-Grained Visual Understanding in MLLMs: Jing Yang,

Sen Yang,

Boqiang Duan,

Ming Dai,

Wei Zhang,

Xiao Tan,

Kunbin Chen,

Wei He,

Jingdong Wang,

Hanli Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Yang, Sen and Duan, Boqiang and Dai, Ming and Zhang, Wei and Tan, Xiao and Chen, Kunbin and He, Wei and Wang, Jingdong and Wang, Hanli}, title = {Hugging Visual Prompt and Segmentation Tokens: Consistency Learning for Fine-Grained Visual Understanding in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5175-5186} }
From Scale to Speed: Adaptive Test-Time Scaling for Image Editing: Xiangyan Qu,

Zhenlong Yuan,

Jing Tang,

Rui Chen,

Datao Tang,

Meng Yu,

Lei Sun,

Yancheng Bai,

Xiangxiang Chu,

Gaopeng Gou,

Gang Xiong,

Yujun Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Xiangyan and Yuan, Zhenlong and Tang, Jing and Chen, Rui and Tang, Datao and Yu, Meng and Sun, Lei and Bai, Yancheng and Chu, Xiangxiang and Gou, Gaopeng and Xiong, Gang and Cai, Yujun}, title = {From Scale to Speed: Adaptive Test-Time Scaling for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23272-23282} }
FSLoRA: Harmonizing Detection and Re-Identification via Freq-Spatial Low-Rank Adapter for One-Stage Person Search: Yanling Tian,

Shanshan Zhang,

Di Chen,

Jian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Yanling and Zhang, Shanshan and Chen, Di and Yang, Jian}, title = {FSLoRA: Harmonizing Detection and Re-Identification via Freq-Spatial Low-Rank Adapter for One-Stage Person Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40428-40437} }
Learning to Track Instance from Single Nature Language Description: Yaozong Zheng,

Bineng Zhong,

Qihua Liang,

Shuimu Zeng,

Haiying Xia,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yaozong and Zhong, Bineng and Liang, Qihua and Zeng, Shuimu and Xia, Haiying and Song, Shuxiang}, title = {Learning to Track Instance from Single Nature Language Description}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20922-20931} }
Concept-Aware LoRA for Domain-Aligned Segmentation Dataset Generation: Minho Park,

Sunghyun Park,

Jungsoo Lee,

Hyojin Park,

Kyuwoong Hwang,

Fatih Porikli,

Jaegul Choo,

Sungha Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Minho and Park, Sunghyun and Lee, Jungsoo and Park, Hyojin and Hwang, Kyuwoong and Porikli, Fatih and Choo, Jaegul and Choi, Sungha}, title = {Concept-Aware LoRA for Domain-Aligned Segmentation Dataset Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39858-39868} }
UniPR: Unified Object-level Real-to-Sim Perception and Reconstruction from a Single Stereo Pair: Chuanrui Zhang,

Yingshuang Zou,

ZhengXian Wu,

Yonggen Ling,

Yuxiao Yang,

Ziwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chuanrui and Zou, Yingshuang and Wu, ZhengXian and Ling, Yonggen and Yang, Yuxiao and Wang, Ziwei}, title = {UniPR: Unified Object-level Real-to-Sim Perception and Reconstruction from a Single Stereo Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4667-4676} }
DiffDecompose: Layer-Wise Decomposition of Alpha-Composited Images via Diffusion Transformers: Zitong Wang,

Hang Zhao,

Qianyu Zhou,

Xuequan Lu,

Xiangtai Li,

Hao Yang,

Bo Yang,

Yiren Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zitong and Zhao, Hang and Zhou, Qianyu and Lu, Xuequan and Li, Xiangtai and Yang, Hao and Yang, Bo and Song, Yiren}, title = {DiffDecompose: Layer-Wise Decomposition of Alpha-Composited Images via Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4624-4634} }
POINTS-Long: Adaptive Dual-Mode Visual Reasoning in MLLMs: Haicheng Wang,

Yuan Liu,

Yikun Liu,

Zhemeng Yu,

Zhongyin Zhao,

Yangxiu You,

Zilin Yu,

Le Tian,

Zhou Xiao,

Jie Zhou,

Weidi Xie,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Haicheng and Liu, Yuan and Liu, Yikun and Yu, Zhemeng and Zhao, Zhongyin and You, Yangxiu and Yu, Zilin and Tian, Le and Xiao, Zhou and Zhou, Jie and Xie, Weidi and Wang, Yanfeng}, title = {POINTS-Long: Adaptive Dual-Mode Visual Reasoning in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19119-19131} }
VISion On Request: Enhanced VLLM efficiency with sparse, dynamically selected, vision-language interactions: Adrian Bulat,

Alberto Baldrati,

Ioannis Maniadis Metaxas,

Yassine Ouali,

Georgios Tzimiropoulos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bulat_2026_CVPR, author = {Bulat, Adrian and Baldrati, Alberto and Metaxas, Ioannis Maniadis and Ouali, Yassine and Tzimiropoulos, Georgios}, title = {VISion On Request: Enhanced VLLM efficiency with sparse, dynamically selected, vision-language interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31920-31930} }
HiLoRA: Hierarchical Low-Rank Adaptation for Personalized Federated Learning: Zihao Peng,

Nan Zou,

Jiandian Zeng,

Guo Li,

Ke Chen,

Boyuan Li,

Tian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Zihao and Zou, Nan and Zeng, Jiandian and Li, Guo and Chen, Ke and Li, Boyuan and Wang, Tian}, title = {HiLoRA: Hierarchical Low-Rank Adaptation for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31746-31757} }
ERMoE: Eigen-Reparameterized Mixture-of-Experts for Stable Routing and Interpretable Specialization: Anzhe Cheng,

Shukai Duan,

Shixuan Li,

Chenzhong Yin,

Mingxi Cheng,

Heng Ping,

Tamoghna Chattopadhyay,

Sophia I. Thomopoulos,

Shahin Nazarian,

Paul Thompson,

Paul Bogdan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Anzhe and Duan, Shukai and Li, Shixuan and Yin, Chenzhong and Cheng, Mingxi and Ping, Heng and Chattopadhyay, Tamoghna and Thomopoulos, Sophia I. and Nazarian, Shahin and Thompson, Paul and Bogdan, Paul}, title = {ERMoE: Eigen-Reparameterized Mixture-of-Experts for Stable Routing and Interpretable Specialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12997-13006} }
EventDrive: Event Cameras for Vision-Language Driving Intelligence: Dongyue Lu,

Rong Li,

Ao Liang,

Lingdong Kong,

Wei Yin,

Lai Xing Ng,

Benoit R. Cottereau,

Camille Simon Chane,

Wei Tsang Ooi; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Dongyue and Li, Rong and Liang, Ao and Kong, Lingdong and Yin, Wei and Ng, Lai Xing and Cottereau, Benoit R. and Chane, Camille Simon and Ooi, Wei Tsang}, title = {EventDrive: Event Cameras for Vision-Language Driving Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22312-22322} }
Counterfactual VLA: Self-Reflective Vision-Language-Action Model with Adaptive Reasoning: Zhenghao Peng,

Wenhao Ding,

Yurong You,

Yuxiao Chen,

Wenjie Luo,

Thomas Tian,

Yulong Cao,

Apoorva Sharma,

Danfei Xu,

Boris Ivanovic,

Boyi Li,

Yan Wang,

Marco Pavone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Zhenghao and Ding, Wenhao and You, Yurong and Chen, Yuxiao and Luo, Wenjie and Tian, Thomas and Cao, Yulong and Sharma, Apoorva and Xu, Danfei and Ivanovic, Boris and Li, Boyi and Wang, Yan and Pavone, Marco}, title = {Counterfactual VLA: Self-Reflective Vision-Language-Action Model with Adaptive Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4022-4031} }
TrafficAlign: Aligning Large Language Models for Traffic Scenario Generation: Zhi Tu,

Liangkun Niu,

Tianyi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2026_CVPR, author = {Tu, Zhi and Niu, Liangkun and Zhang, Tianyi}, title = {TrafficAlign: Aligning Large Language Models for Traffic Scenario Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39744-39754} }
Reward Sharpness-Aware Fine-Tuning for Diffusion Models: Kwanyoung Kim,

Byeongsu Sim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Kwanyoung and Sim, Byeongsu}, title = {Reward Sharpness-Aware Fine-Tuning for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36051-36061} }
SGDE: Self-supervised Geometry Degradation Estimation Framework for Coded Aperture Compressive Spectral Imaging: Yuqiao He,

Xiaoyan Liu,

Jianxu Mao,

Yaonan Wang,

Hui Zhang,

Lizhu Liu,

Yurong Chen,

Wenbin He; [pdf] [supp]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yuqiao and Liu, Xiaoyan and Mao, Jianxu and Wang, Yaonan and Zhang, Hui and Liu, Lizhu and Chen, Yurong and He, Wenbin}, title = {SGDE: Self-supervised Geometry Degradation Estimation Framework for Coded Aperture Compressive Spectral Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34084-34094} }
Evaluating Generative Models via One-Dimensional Code Distributions: Zexi Jia,

Pengcheng Luo,

Yijia Zhong,

Jinchao Zhang,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Zexi and Luo, Pengcheng and Zhong, Yijia and Zhang, Jinchao and Zhou, Jie}, title = {Evaluating Generative Models via One-Dimensional Code Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17077-17086} }
Dynamic Momentum Recalibration in Online Gradient Learning: Zhipeng Yao,

Rui Yu,

Guisong Chang,

Ying Li,

Yu Zhang,

Dazhou Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2026_CVPR, author = {Yao, Zhipeng and Yu, Rui and Chang, Guisong and Li, Ying and Zhang, Yu and Li, Dazhou}, title = {Dynamic Momentum Recalibration in Online Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12902-12912} }
AG-VAS: Anchor-Guided Zero-Shot Visual Anomaly Segmentation with Large Multimodal Models: Zhen Qu,

Xian Tao,

Xiaoyi Bao,

Dingrong Wang,

ShiChen Qu,

Zhengtao Zhang,

Xingang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2026_CVPR, author = {Qu, Zhen and Tao, Xian and Bao, Xiaoyi and Wang, Dingrong and Qu, ShiChen and Zhang, Zhengtao and Wang, Xingang}, title = {AG-VAS: Anchor-Guided Zero-Shot Visual Anomaly Segmentation with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14126-14136} }
MindPower: Enabling Theory-of-Mind Reasoning in VLM-based Embodied Agents: Ruoxuan Zhang,

Qiyun Zheng,

Zhiyu Zhou,

Ziqi Liao,

Siyu Wu,

Jian-Yu Jiang-Lin,

Bin Wen,

Hongxia Xie,

Jianlong Fu,

Wen-Huang Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruoxuan and Zheng, Qiyun and Zhou, Zhiyu and Liao, Ziqi and Wu, Siyu and Jiang-Lin, Jian-Yu and Wen, Bin and Xie, Hongxia and Fu, Jianlong and Cheng, Wen-Huang}, title = {MindPower: Enabling Theory-of-Mind Reasoning in VLM-based Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29631-29641} }
ComPose: A Unified Completion-Pose Framework for Robust Category-Level Object Pose Estimation: Huan Ren,

Yihan Chen,

Chuxin Wang,

Nailong Liu,

Wenfei Yang,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Huan and Chen, Yihan and Wang, Chuxin and Liu, Nailong and Yang, Wenfei and Zhang, Tianzhu}, title = {ComPose: A Unified Completion-Pose Framework for Robust Category-Level Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14315-14324} }
Online Data Curation for Object Detection via Marginal Contributions to Dataset-level Average Precision: Zitang Sun,

Masakazu Yoshimura,

Junji Otsuka,

Atsushi Irie,

Takeshi Ohashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zitang and Yoshimura, Masakazu and Otsuka, Junji and Irie, Atsushi and Ohashi, Takeshi}, title = {Online Data Curation for Object Detection via Marginal Contributions to Dataset-level Average Precision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32787-32797} }
LaVR: Scene Latent Conditioned Generative Video Trajectory Re-Rendering using Large 4D Reconstruction Models: Mingyang Xie,

Numair Khan,

Tianfu Wang,

Naina Dhingra,

Seonghyeon Nam,

Haitao Yang,

Zhuo Hui,

Christopher Metzler,

Andrea Vedaldi,

Hamed Pirsiavash,

Lei Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Mingyang and Khan, Numair and Wang, Tianfu and Dhingra, Naina and Nam, Seonghyeon and Yang, Haitao and Hui, Zhuo and Metzler, Christopher and Vedaldi, Andrea and Pirsiavash, Hamed and Luo, Lei}, title = {LaVR: Scene Latent Conditioned Generative Video Trajectory Re-Rendering using Large 4D Reconstruction Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25450-25460} }
HierAmp: Coarse-to-Fine Autoregressive Amplification for Generative Dataset Distillation: Lin Zhao,

Xinru Jiang,

Xi Xiao,

Qihui Fan,

Lei Lu,

Yanzhi Wang,

Xue Lin,

Octavia Camps,

Pu Zhao,

Jianyang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lin and Jiang, Xinru and Xiao, Xi and Fan, Qihui and Lu, Lei and Wang, Yanzhi and Lin, Xue and Camps, Octavia and Zhao, Pu and Gu, Jianyang}, title = {HierAmp: Coarse-to-Fine Autoregressive Amplification for Generative Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41688-41698} }
Leveraging Class Distributions in CLIP for Weakly Supervised Semantic Segmentation: Ziqian Yang,

Xinqiao Zhao,

Xiaolei Wang,

Quan Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Ziqian and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {Leveraging Class Distributions in CLIP for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34714-34723} }
Accelerating Diffusion Model Training under Minimal Budgets: A Condensation-Based Perspective: Rui Huang,

Shitong Shao,

zikai zhou,

Pukun Zhao,

Hangyu Guo,

Tian Ye,

Lichen Bai,

Shuo Yang,

Zeke Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Rui and Shao, Shitong and zhou, zikai and Zhao, Pukun and Guo, Hangyu and Ye, Tian and Bai, Lichen and Yang, Shuo and Xie, Zeke}, title = {Accelerating Diffusion Model Training under Minimal Budgets: A Condensation-Based Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43621-43631} }
Defect Cue-Preserved Structural Feature Refinement for Few-Shot Anomaly Detection: Le Jiang,

Yan Huang,

Zhen Xu,

Yong Xu,

Hau-San Wong,

Si Wu; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Le and Huang, Yan and Xu, Zhen and Xu, Yong and Wong, Hau-San and Wu, Si}, title = {Defect Cue-Preserved Structural Feature Refinement for Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35607-35616} }
Robust3DGSW: Toward Robust Watermarking for Quantization-Aware 3D Gaussian Splatting: Boyu Wang,

Jun Xia,

Mingsong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Boyu and Xia, Jun and Chen, Mingsong}, title = {Robust3DGSW: Toward Robust Watermarking for Quantization-Aware 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19076-19084} }
RehearseVLA: Simulated Post-Training for VLAs with Physically-Consistent World Model: Junjin Xiao,

Yandan Yang,

Xinyuan Chang,

Ronghan Chen,

Feng Xiong,

Mu Xu,

Wei-Shi Zheng,

Qing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junjin and Yang, Yandan and Chang, Xinyuan and Chen, Ronghan and Xiong, Feng and Xu, Mu and Zheng, Wei-Shi and Zhang, Qing}, title = {RehearseVLA: Simulated Post-Training for VLAs with Physically-Consistent World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20867-20877} }
Neural Mixture Density Processes: Yi Ding,

Qi Tao,

Xingxing Liang,

Longfei Zhang,

Yiqin Lv,

Weitao Song,

Fangjie Yang,

Cheems Wang,

Guangquan Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Yi and Tao, Qi and Liang, Xingxing and Zhang, Longfei and Lv, Yiqin and Song, Weitao and Yang, Fangjie and Wang, Cheems and Cheng, Guangquan}, title = {Neural Mixture Density Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39680-39690} }
Opening the Sim-to-Real Door for Humanoid Pixel-to-Action Policy Transfer: Haoru Xue,

Tairan He,

Zi Wang,

Qingwei Ben,

Wenli Xiao,

Zhengyi Luo,

Xingye Da,

Fernando Castañeda,

Guanya Shi,

Shankar Sastry,

Linxi Fan,

Yuke Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Haoru and He, Tairan and Wang, Zi and Ben, Qingwei and Xiao, Wenli and Luo, Zhengyi and Da, Xingye and Casta\~neda, Fernando and Shi, Guanya and Sastry, Shankar and Fan, Linxi and Zhu, Yuke}, title = {Opening the Sim-to-Real Door for Humanoid Pixel-to-Action Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6642-6652} }
Distributed Image Compression with Multimodal Side Information at Extremely Low Bitrates: Guojun Xu,

Mingyang Zhang,

Jianwen Xiang,

Cheng Tan,

Yanchao Yang,

Junwei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guojun and Zhang, Mingyang and Xiang, Jianwen and Tan, Cheng and Yang, Yanchao and Zhou, Junwei}, title = {Distributed Image Compression with Multimodal Side Information at Extremely Low Bitrates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33662-33671} }
Progressive Guessing to Fixed Point: Rethinking Human Motion Prediction with Deep Equilibrium Models: Dong Wei,

Huaijiang Sun,

Fan Liu,

Yuhui Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Dong and Sun, Huaijiang and Liu, Fan and Zheng, Yuhui}, title = {Progressive Guessing to Fixed Point: Rethinking Human Motion Prediction with Deep Equilibrium Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16454-16463} }
STAGE: Storyboard-Anchored Generation for Cinematic Multi-shot Narrative: Peixuan Zhang,

Zijian Jia,

Kaiqi Liu,

Shuchen Weng,

Si Li,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peixuan and Jia, Zijian and Liu, Kaiqi and Weng, Shuchen and Li, Si and Shi, Boxin}, title = {STAGE: Storyboard-Anchored Generation for Cinematic Multi-shot Narrative}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {659-669} }
FreqEdit: Preserving High-Frequency Features for Robust Multi-Turn Image Editing: Yucheng Liao,

Jiajun Liang,

Kaiqian Cui,

Baoquan Zhao,

Haoran Xie,

Wei Liu,

Qing Li,

Xudong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Yucheng and Liang, Jiajun and Cui, Kaiqian and Zhao, Baoquan and Xie, Haoran and Liu, Wei and Li, Qing and Mao, Xudong}, title = {FreqEdit: Preserving High-Frequency Features for Robust Multi-Turn Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43525-43535} }
Affordance-First Decomposition for Continual Learning in Video-Language Understanding: Mengzhu xu,

Hanzhi Liu,

Ningkang Peng,

Qianyu Chen,

Canran Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{xu_2026_CVPR, author = {xu, Mengzhu and Liu, Hanzhi and Peng, Ningkang and Chen, Qianyu and Xiao, Canran}, title = {Affordance-First Decomposition for Continual Learning in Video-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3908-3919} }
NEAF: Natural Image Editing with Attention Fusion for Generalizable Test-time Optimization in Text-Guided Image Editing: Jisoo Kim,

Heeseok Oh; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jisoo and Oh, Heeseok}, title = {NEAF: Natural Image Editing with Attention Fusion for Generalizable Test-time Optimization in Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22509-22518} }
How Much 3D Do Video Foundation Models Encode?: Zixuan Huang,

Xiang Li,

Zhaoyang Lv,

James M. Rehg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zixuan and Li, Xiang and Lv, Zhaoyang and Rehg, James M.}, title = {How Much 3D Do Video Foundation Models Encode?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {384-394} }
VisiLock: Authorizing Instruction-based Image editing with Dual Score Distillation: Van Thanh Le,

Yun Fu; [pdf] [supp]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Van Thanh and Fu, Yun}, title = {VisiLock: Authorizing Instruction-based Image editing with Dual Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15710-15718} }
AdaPrior: Bayesian-Inspired Adaptive Prior Correction for Long-Tailed Continual Learning: S Divakar Bhat,

Amit Popat More,

Mudit Soni,

Bhuvan Aggarwal; [pdf] [supp]
[bibtex]
@InProceedings{Bhat_2026_CVPR, author = {Bhat, S Divakar and More, Amit Popat and Soni, Mudit and Aggarwal, Bhuvan}, title = {AdaPrior: Bayesian-Inspired Adaptive Prior Correction for Long-Tailed Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10840-10850} }
Smart Replay: Adaptive Scheduling of Memory Rehearsal for Computational Resource-Aware Incremental Learning: Jianting Chen,

Dianzhi Yu,

Irwin King; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jianting and Yu, Dianzhi and King, Irwin}, title = {Smart Replay: Adaptive Scheduling of Memory Rehearsal for Computational Resource-Aware Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39945-39961} }
Curriculum Group Policy Optimization: Adaptive Sampling for Unleashing the Potential of Text-to-Image Generation: Baoteng Li,

Xianghao Zang,

Xinran Wang,

Xiangyu Na,

Zhixiang He,

Hao Sun,

Chi Zhang,

Zhongjiang He,

Tianwei Cao,

Kongming Liang,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Baoteng and Zang, Xianghao and Wang, Xinran and Na, Xiangyu and He, Zhixiang and Sun, Hao and Zhang, Chi and He, Zhongjiang and Cao, Tianwei and Liang, Kongming and Ma, Zhanyu}, title = {Curriculum Group Policy Optimization: Adaptive Sampling for Unleashing the Potential of Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {561-571} }
Towards Sparse Video Understanding and Reasoning: Chenwei Xu,

Zhen Ye,

Shang Wu,

Weijian Li,

Zihan Wang,

Zhuofan Xia,

Lie Lu,

Pranav Maneriker,

Fan Du,

Manling Li,

Han Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Chenwei and Ye, Zhen and Wu, Shang and Li, Weijian and Wang, Zihan and Xia, Zhuofan and Lu, Lie and Maneriker, Pranav and Du, Fan and Li, Manling and Liu, Han}, title = {Towards Sparse Video Understanding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11357-11368} }
FlowPalm: Optical Flow Driven Non-Rigid Deformation for Geometrically Diverse Palmprint Generation: Yuchen Zou,

Huikai Shao,

Lihuang Fang,

Zhipeng Xiong,

Dexing Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Yuchen and Shao, Huikai and Fang, Lihuang and Xiong, Zhipeng and Zhong, Dexing}, title = {FlowPalm: Optical Flow Driven Non-Rigid Deformation for Geometrically Diverse Palmprint Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23591-23600} }
When Safety Collides: Resolving Multi-Category Harmful Conflicts in Text-to-Image Diffusion via Adaptive Safety Guidance: Yongli Xiang,

Ziming Hong,

Zhaoqing Wang,

Xiangyu Zhao,

Bo Han,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2026_CVPR, author = {Xiang, Yongli and Hong, Ziming and Wang, Zhaoqing and Zhao, Xiangyu and Han, Bo and Liu, Tongliang}, title = {When Safety Collides: Resolving Multi-Category Harmful Conflicts in Text-to-Image Diffusion via Adaptive Safety Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14746-14755} }
Layer Consistency Matters: Elegant Latent Transition Discrepancy for Generalizable Synthetic Image Detection: Yawen Yang,

Feng Li,

Shuqi Kong,

Yunfeng Diao,

Xinjian Gao,

Zenglin Shi,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yawen and Li, Feng and Kong, Shuqi and Diao, Yunfeng and Gao, Xinjian and Shi, Zenglin and Wang, Meng}, title = {Layer Consistency Matters: Elegant Latent Transition Discrepancy for Generalizable Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38111-38121} }
PointNSP: Autoregressive 3D Point Cloud Generation with Next-Scale Level-of-Detail Prediction: Ziqiao Meng,

Qichao Wang,

Zhiyang Dou,

Zixing Song,

Zhipeng Zhou,

Irwin King,

Peilin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2026_CVPR, author = {Meng, Ziqiao and Wang, Qichao and Dou, Zhiyang and Song, Zixing and Zhou, Zhipeng and King, Irwin and Zhao, Peilin}, title = {PointNSP: Autoregressive 3D Point Cloud Generation with Next-Scale Level-of-Detail Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31450-31461} }
Can a Second-View Image Be a Language? Geometric and Semantic Cross-Modal Reasoning for X-ray Prohibited Item Detection: Chuang Peng,

Renshuai Tao,

Zhongwei Ren,

Xianglong Liu,

Yunchao Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Chuang and Tao, Renshuai and Ren, Zhongwei and Liu, Xianglong and Wei, Yunchao}, title = {Can a Second-View Image Be a Language? Geometric and Semantic Cross-Modal Reasoning for X-ray Prohibited Item Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26176-26186} }
TIM: Temporal Decoupling with Iterative Mutual-Refinement Model for Longitudinal Radiology Report Generation: Yiheng Dong,

Yi Lin,

Shilong Huang,

Xiyan Yang,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Yiheng and Lin, Yi and Huang, Shilong and Yang, Xiyan and Yang, Xin}, title = {TIM: Temporal Decoupling with Iterative Mutual-Refinement Model for Longitudinal Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6951-6961} }
Prompt-Free Universal Region Proposal Network: Qihong Tang,

Changhan Liu,

Shaofeng Zhang,

Wenbin Li,

Qi Fan,

Yang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Qihong and Liu, Changhan and Zhang, Shaofeng and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {Prompt-Free Universal Region Proposal Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13080-13090} }
Stabilizing Feature Geometry in Noisy Pretrained Models for Robust Downstream Tasks: Quanyu Zhang,

Zhongyi Han,

Hao Sun,

Yongshun Gong,

Xiaoyan Wang,

Yilong Yin,

Shuo Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Quanyu and Han, Zhongyi and Sun, Hao and Gong, Yongshun and Wang, Xiaoyan and Yin, Yilong and Li, Shuo}, title = {Stabilizing Feature Geometry in Noisy Pretrained Models for Robust Downstream Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {789-800} }
Fast Spatial Tracking with Visual Geometry Transformer: Chengjie Huang,

Guile Wu,

Dongfeng Bai,

Bingbing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Chengjie and Wu, Guile and Bai, Dongfeng and Liu, Bingbing}, title = {Fast Spatial Tracking with Visual Geometry Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {374-383} }
RankOOD - Class Ranking-based Out-of-Distribution Detection: Dishanika Denipitiyage,

Naveen Karunanayake,

Suranga Seneviratne,

Sanjay Chawla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Denipitiyage_2026_CVPR, author = {Denipitiyage, Dishanika and Karunanayake, Naveen and Seneviratne, Suranga and Chawla, Sanjay}, title = {RankOOD - Class Ranking-based Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42280-42289} }
SASNet: Spatially-Adaptive Sinusoidal Networks for INRs: Haoan Feng,

Diana Aldana,

Tiago Novello,

Leila De Floriani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Haoan and Aldana, Diana and Novello, Tiago and De Floriani, Leila}, title = {SASNet: Spatially-Adaptive Sinusoidal Networks for INRs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41964-41973} }
Imagine Before Concentration: Diffusion-Guided Registers Enhance Partially Relevant Video Retrieval: Jun Li,

Xuhang Lou,

Jinpeng Wang,

Yuting Wang,

Yaowei Wang,

Shu-Tao Xia,

Bin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jun and Lou, Xuhang and Wang, Jinpeng and Wang, Yuting and Wang, Yaowei and Xia, Shu-Tao and Chen, Bin}, title = {Imagine Before Concentration: Diffusion-Guided Registers Enhance Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9710-9721} }
YOLO-Master: MOE-Accelerated with Specialized Transformers for Enhanced Real-time Detection: Xu Lin,

Jinlong Peng,

Zhenye Gan,

Jiawen Zhu,

Jun Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Xu and Peng, Jinlong and Gan, Zhenye and Zhu, Jiawen and Liu, Jun}, title = {YOLO-Master: MOE-Accelerated with Specialized Transformers for Enhanced Real-time Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18440-18449} }
IncreFA: Breaking the Static Wall of Generative Model Attribution: Haotian Qin,

Dongliang Chang,

Yueying Gao,

Yuexuan Tan,

Lei Chen,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2026_CVPR, author = {Qin, Haotian and Chang, Dongliang and Gao, Yueying and Tan, Yuexuan and Chen, Lei and Ma, Zhanyu}, title = {IncreFA: Breaking the Static Wall of Generative Model Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35405-35415} }
Rethinking Camera Choice: An Empirical Study on Fisheye Camera Properties in Robotic Manipulation: Han Xue,

Nan Min,

Xiaotong Liu,

Wendi Chen,

Yuan Fang,

Jun Lv,

Cewu Lu,

Chuan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2026_CVPR, author = {Xue, Han and Min, Nan and Liu, Xiaotong and Chen, Wendi and Fang, Yuan and Lv, Jun and Lu, Cewu and Wen, Chuan}, title = {Rethinking Camera Choice: An Empirical Study on Fisheye Camera Properties in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35059-35069} }
VideoSeek: Long-Horizon Video Agent with Tool-Guided Seeking: Jingyang Lin,

Jialian Wu,

Jiang Liu,

Ximeng Sun,

Ze Wang,

Xiaodong Yu,

Jiebo Luo,

Zicheng Liu,

Emad Barsoum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jingyang and Wu, Jialian and Liu, Jiang and Sun, Ximeng and Wang, Ze and Yu, Xiaodong and Luo, Jiebo and Liu, Zicheng and Barsoum, Emad}, title = {VideoSeek: Long-Horizon Video Agent with Tool-Guided Seeking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5465-5475} }
BAMI: Training-Free Bias Mitigation in GUI Grounding: Borui Zhang,

Bo Zhang,

Bo Wang,

Wenzhao Zheng,

Yuhao Cheng,

Liang Tang,

Yiqiang Yan,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Borui and Zhang, Bo and Wang, Bo and Zheng, Wenzhao and Cheng, Yuhao and Tang, Liang and Yan, Yiqiang and Zhou, Jie and Lu, Jiwen}, title = {BAMI: Training-Free Bias Mitigation in GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34596-34605} }
Cinematic Audio Source Separation Using Visual Cues: Kang Zhang,

Suyeon Lee,

Arda Senocak,

Joon Son Chung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kang and Lee, Suyeon and Senocak, Arda and Chung, Joon Son}, title = {Cinematic Audio Source Separation Using Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37874-37884} }
GrOCE : Graph-Guided Online Concept Erasure for Text-to-Image Diffusion Models: Ning Han,

Zhenyu Ge,

Feng Han,

Yuhua Sun,

Chengqing Li,

Jingjing Chen; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Ning and Ge, Zhenyu and Han, Feng and Sun, Yuhua and Li, Chengqing and Chen, Jingjing}, title = {GrOCE : Graph-Guided Online Concept Erasure for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43536-43545} }
PointAlign: Feature-Level Alignment Regularization for 3D Vision-Language Models: Yuanhao Su,

Shaofeng Zhang,

Xiaosong Jia,

Qi Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Yuanhao and Zhang, Shaofeng and Jia, Xiaosong and Fan, Qi}, title = {PointAlign: Feature-Level Alignment Regularization for 3D Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22100-22110} }
NeAR: Coupled Neural Asset-Renderer Stack: Hong Li,

Chongjie Ye,

Houyuan Chen,

Weiqing Xiao,

Ziyang Yan,

Lixing Xiao,

Zhaoxi Chen,

Jianfeng Xiang,

Shaocong Xu,

Xuhui Liu,

Yikai Wang,

Baochang Zhang,

Xiaoguang Han,

Jiaolong Yang,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hong and Ye, Chongjie and Chen, Houyuan and Xiao, Weiqing and Yan, Ziyang and Xiao, Lixing and Chen, Zhaoxi and Xiang, Jianfeng and Xu, Shaocong and Liu, Xuhui and Wang, Yikai and Zhang, Baochang and Han, Xiaoguang and Yang, Jiaolong and Zhao, Hao}, title = {NeAR: Coupled Neural Asset-Renderer Stack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29834-29844} }
MOON2.0: Dynamic Modality-balanced Multimodal Representation Learning for E-commerce Product Understanding: Zhanheng Nie,

Chenghan Fu,

Daoze Zhang,

Junxian Wu,

Wanxian Guan,

Pengjie Wang,

Jian Xu,

Bo Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Nie_2026_CVPR, author = {Nie, Zhanheng and Fu, Chenghan and Zhang, Daoze and Wu, Junxian and Guan, Wanxian and Wang, Pengjie and Xu, Jian and Zheng, Bo}, title = {MOON2.0: Dynamic Modality-balanced Multimodal Representation Learning for E-commerce Product Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22975-22985} }
ProgressiveAvatars: Progressive Animatable 3D Gaussian Avatars: Kaiwen Song,

Jinkai Cui,

Juyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Kaiwen and Cui, Jinkai and Zhang, Juyong}, title = {ProgressiveAvatars: Progressive Animatable 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32518-32527} }
ApET: Approximation-Error Guided Token Compression for Efficient VLMs: Qiankun Ma,

Ziyao Zhang,

Haofei Wang,

Zhen Song,

Jie Chen,

Hairong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Qiankun and Zhang, Ziyao and Wang, Haofei and Song, Zhen and Chen, Jie and Zheng, Hairong}, title = {ApET: Approximation-Error Guided Token Compression for Efficient VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26306-26316} }
Scene Grounding in the Wild: Tamir Cohen,

Leo Segre,

Shay Shomer-Chai,

Shai Avidan,

Hadar Averbuch-Elor; [pdf] [supp]
[bibtex]
@InProceedings{Cohen_2026_CVPR, author = {Cohen, Tamir and Segre, Leo and Shomer-Chai, Shay and Avidan, Shai and Averbuch-Elor, Hadar}, title = {Scene Grounding in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33353-33363} }
Polyphony: Diffusion-based Dual-Hand Action Segmentation with Alternating Vision Transformer and Semantic Conditioning: Hao Zheng,

Hu Wang,

Tiantian Zheng,

Prajjwal Bhattarai,

Tuka Alhanai; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hao and Wang, Hu and Zheng, Tiantian and Bhattarai, Prajjwal and Alhanai, Tuka}, title = {Polyphony: Diffusion-based Dual-Hand Action Segmentation with Alternating Vision Transformer and Semantic Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20098-20107} }
EgoFlow: Gradient-Guided Flow Matching for Egocentric 6DoF Object Motion Generation: Abhishek Saroha,

Huajian Zeng,

Xingxing Zuo,

Daniel Cremers,

Xi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saroha_2026_CVPR, author = {Saroha, Abhishek and Zeng, Huajian and Zuo, Xingxing and Cremers, Daniel and Wang, Xi}, title = {EgoFlow: Gradient-Guided Flow Matching for Egocentric 6DoF Object Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4332-4342} }
Learning to See through Illumination Extremes with Event Streaming in Multimodal Large Language Models: Baoheng Zhang,

Jiahui Liu,

Gui Zhao,

Weizhou Zhang,

Yixuan Ma,

Jun Jiang,

Yingxian Chen,

Wilton W.T. Fok,

Xiaojuan Qi,

Hayden Kwok-Hay So; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Baoheng and Liu, Jiahui and Zhao, Gui and Zhang, Weizhou and Ma, Yixuan and Jiang, Jun and Chen, Yingxian and Fok, Wilton W.T. and Qi, Xiaojuan and So, Hayden Kwok-Hay}, title = {Learning to See through Illumination Extremes with Event Streaming in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26198-26208} }
All Roads Lead to Rome: Incentivizing Divergent Thinking in Vision-Language Models: Xinyu Tian,

Shu Zou,

Zhaoyuan Yang,

Mengqi He,

Peter Tu,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2026_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and He, Mengqi and Tu, Peter and Zhang, Jing}, title = {All Roads Lead to Rome: Incentivizing Divergent Thinking in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33488-33498} }
MCHDoc: A Comprehensive Benchmark for Reading Multi-Carrier Chinese Historical Documents: Yijun Sheng,

Shipeng Zhu,

Ruijia Zuo,

Na Nie,

Hui Xue; [pdf] [supp]
[bibtex]
@InProceedings{Sheng_2026_CVPR, author = {Sheng, Yijun and Zhu, Shipeng and Zuo, Ruijia and Nie, Na and Xue, Hui}, title = {MCHDoc: A Comprehensive Benchmark for Reading Multi-Carrier Chinese Historical Documents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38722-38731} }
GM-R^2: Generative Matching Learning for Unsupervised Geometric Representation and Registration: Haobo Jiang,

Liang Yu,

Jianmin Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haobo and Yu, Liang and Zheng, Jianmin}, title = {GM-R{\textasciicircum}2: Generative Matching Learning for Unsupervised Geometric Representation and Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31430-31439} }
Low-Rank Residual Diffusion Models: Junfu Tan,

Jiang Yuan; [pdf]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Junfu and Yuan, Jiang}, title = {Low-Rank Residual Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35747-35757} }
VA-p: Variational Policy Alignment for Pixel-Aware Autoregressive Generation: Xinyao Liao,

Qiyuan He,

Kai Xu,

Xiaoye Qu,

Yicong Li,

Wei Wei,

Angela Yao; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Xinyao and He, Qiyuan and Xu, Kai and Qu, Xiaoye and Li, Yicong and Wei, Wei and Yao, Angela}, title = {VA-p: Variational Policy Alignment for Pixel-Aware Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12787-12797} }
ORSATR-X: A Foundation Model based on Differential-and-Excitation Networks for Optical Remote Sensing Object Recognition: Canyu Mo,

Yongxiang Liu,

Jiehua Zhang,

Zilong Yu,

Zhen Liu,

Tianpeng Liu,

Li Liu; [pdf] [supp]
[bibtex]
@InProceedings{Mo_2026_CVPR, author = {Mo, Canyu and Liu, Yongxiang and Zhang, Jiehua and Yu, Zilong and Liu, Zhen and Liu, Tianpeng and Liu, Li}, title = {ORSATR-X: A Foundation Model based on Differential-and-Excitation Networks for Optical Remote Sensing Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27849-27860} }
GGBench: A Geometric Generative Reasoning Benchmark for Unified Multimodal Models: Jingxuan Wei,

Caijun Jia,

Xi Bai,

Xinglong Xu,

Siyuan Li,

Linzhuang Sun,

Bihui Yu,

Conghui He,

Lijun Wu,

Cheng Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Jingxuan and Jia, Caijun and Bai, Xi and Xu, Xinglong and Li, Siyuan and Sun, Linzhuang and Yu, Bihui and He, Conghui and Wu, Lijun and Tan, Cheng}, title = {GGBench: A Geometric Generative Reasoning Benchmark for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5199-5210} }
NeuroRule: Bridging Vision and Logic with Differentiable Rule Induction: Muhammad Zarar,

Mingzheng Zhang,

Xiaowang Zhang,

Zhiyong Feng; [pdf] [supp]
[bibtex]
@InProceedings{Zarar_2026_CVPR, author = {Zarar, Muhammad and Zhang, Mingzheng and Zhang, Xiaowang and Feng, Zhiyong}, title = {NeuroRule: Bridging Vision and Logic with Differentiable Rule Induction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11654-11663} }
Learning to Act Robustly with View-Invariant Latent Actions: Youngjoon Jeong,

Junha Chun,

Taesup Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Youngjoon and Chun, Junha and Kim, Taesup}, title = {Learning to Act Robustly with View-Invariant Latent Actions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6781-6790} }
Cross-View Splatter: Feed-Forward View Synthesis with Georeferenced Images: Matias Turkulainen,

Akshay Krishnan,

Filippo Aleotti,

Mohamed Sayed,

Guillermo Garcia-Hernando,

Juho Kannala,

Arno Solin,

Gabriel Brostow,

Daniyar Turmukhambetov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Turkulainen_2026_CVPR, author = {Turkulainen, Matias and Krishnan, Akshay and Aleotti, Filippo and Sayed, Mohamed and Garcia-Hernando, Guillermo and Kannala, Juho and Solin, Arno and Brostow, Gabriel and Turmukhambetov, Daniyar}, title = {Cross-View Splatter: Feed-Forward View Synthesis with Georeferenced Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40959-40970} }
Beyond Single Solution: Multi-Hypothesis Deep Unfolding Network for Image Compressive Sensing: Wenxue Cui,

Hualin Li,

Yuhang Qin,

Yifu Xu,

Xiaopeng Fan,

Debin Zhao; [pdf]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Wenxue and Li, Hualin and Qin, Yuhang and Xu, Yifu and Fan, Xiaopeng and Zhao, Debin}, title = {Beyond Single Solution: Multi-Hypothesis Deep Unfolding Network for Image Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5283-5293} }
Chain of Event-Centric Causal Thought for Physically Plausible Video Generation: Zixuan Wang,

Yixin Hu,

Haolan Wang,

Feng Chen,

Yan Liu,

Wen Li,

Yinjie Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zixuan and Hu, Yixin and Wang, Haolan and Chen, Feng and Liu, Yan and Li, Wen and Lei, Yinjie}, title = {Chain of Event-Centric Causal Thought for Physically Plausible Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38122-38131} }
SunFaded: Illumination-Aware Gaussian Splatting for Dark Scenes with Camera-Mounted Active Lighting: Wenjie Chang,

Tianle Ding,

Wenfei Yang,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2026_CVPR, author = {Chang, Wenjie and Ding, Tianle and Yang, Wenfei and Zhang, Tianzhu}, title = {SunFaded: Illumination-Aware Gaussian Splatting for Dark Scenes with Camera-Mounted Active Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40876-40885} }
MM-SeR: Multimodal Self-Refinement for Lightweight Image Captioning: Junha Song,

Yongsik Jo,

So Yeon Min,

Quanting Xie,

Taehwan Kim,

Yonatan Bisk,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Junha and Jo, Yongsik and Min, So Yeon and Xie, Quanting and Kim, Taehwan and Bisk, Yonatan and Choo, Jaegul}, title = {MM-SeR: Multimodal Self-Refinement for Lightweight Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30929-30940} }
Thermally Activated Dual-Modal Adversarial Clothing against AI Surveillance Systems: Jiahuan Long,

Tingsong Jiang,

Hanqing Liu,

Chao Ma,

Weien Zhou,

Yang Yang,

Wen Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Jiahuan and Jiang, Tingsong and Liu, Hanqing and Ma, Chao and Zhou, Weien and Yang, Yang and Yao, Wen}, title = {Thermally Activated Dual-Modal Adversarial Clothing against AI Surveillance Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34929-34939} }
Zero-shot Detection of AI-Generated Image via RAW-RGB Alignment: Haiwei Wu,

Fengpeng Li,

Zhilin Tu,

Yuanman Li,

Xiong Li,

Jiantao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haiwei and Li, Fengpeng and Tu, Zhilin and Li, Yuanman and Li, Xiong and Zhou, Jiantao}, title = {Zero-shot Detection of AI-Generated Image via RAW-RGB Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42997-43007} }
Hunting Normality from Query Sample via Residual Learning for Generalist Anomaly Detection: Xiaolei Wang,

Yuexin Wang,

Tianhong Dai,

Huihui Bai,

Yao Zhao,

Jimin Xiao; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaolei and Wang, Yuexin and Dai, Tianhong and Bai, Huihui and Zhao, Yao and Xiao, Jimin}, title = {Hunting Normality from Query Sample via Residual Learning for Generalist Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43093-43102} }
TacSIm: A Dataset and Benchmark for Football Tactical Style Imitation: Peng Wen,

Yuting Wang,

Qiurui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Peng and Wang, Yuting and Wang, Qiurui}, title = {TacSIm: A Dataset and Benchmark for Football Tactical Style Imitation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20014-20023} }
See Further, Think Deeper: Advancing VLM's Reasoning Ability with Low-level Visual Cues and Reflection: Zhiheng Wu,

Tong Wang,

Shuning Wang,

Naiming Liu,

Yumeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhiheng and Wang, Tong and Wang, Shuning and Liu, Naiming and Zhang, Yumeng}, title = {See Further, Think Deeper: Advancing VLM's Reasoning Ability with Low-level Visual Cues and Reflection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18870-18880} }
OccAny: Generalized Unconstrained Urban 3D Occupancy: Anh-Quan Cao,

Tuan-Hung Vu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Anh-Quan and Vu, Tuan-Hung}, title = {OccAny: Generalized Unconstrained Urban 3D Occupancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28599-28609} }
MedKCO: Medical Vision-Language Pretraining via Knowledge-Driven Cognitive Orchestration: Chenran Zhang,

Ruiqi Wu,

Tao Zhou,

Yi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenran and Wu, Ruiqi and Zhou, Tao and Zhou, Yi}, title = {MedKCO: Medical Vision-Language Pretraining via Knowledge-Driven Cognitive Orchestration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35260-35269} }
MOGeo: Beyond One-to-One Cross-View Object Geo-localization: Bo Lv,

Qingwang Zhang,

Le Wu,

Yuanyuan Li,

Yingying Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Bo and Zhang, Qingwang and Wu, Le and Li, Yuanyuan and Zhu, Yingying}, title = {MOGeo: Beyond One-to-One Cross-View Object Geo-localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26422-26431} }
Thermal Diffusion Matters: Infrared Spatial-Temporal Video Super-Resolution through Heat Conduction Priors: Mingxuan Zhou,

Shuang Li,

Yutang Zhang,

Jing Geng,

Yirui Shen,

Jingxuan Kang,

Fuzhen Zhuang,

Shuigen Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Mingxuan and Li, Shuang and Zhang, Yutang and Geng, Jing and Shen, Yirui and Kang, Jingxuan and Zhuang, Fuzhen and Wang, Shuigen}, title = {Thermal Diffusion Matters: Infrared Spatial-Temporal Video Super-Resolution through Heat Conduction Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2146-2155} }
BIT: Matching-based Bi-directional Interaction Transformation Network for Visible-Infrared Person Re-Identification: Haoxuan Xu,

Guanglin Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Haoxuan and Niu, Guanglin}, title = {BIT: Matching-based Bi-directional Interaction Transformation Network for Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40386-40396} }
TeamHOI: Learning a Unified Policy for Cooperative Human-Object Interactions with Any Team Size: Stefan Lionar,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lionar_2026_CVPR, author = {Lionar, Stefan and Lee, Gim Hee}, title = {TeamHOI: Learning a Unified Policy for Cooperative Human-Object Interactions with Any Team Size}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37121-37132} }
Regulating Rather than Constraining: Adaptive Guidance for Complex Spectral Reconstruction in Pansharpening: Zhuwei Wen,

Zimin Xia,

He Chen,

Linwei Yue,

Xianwei Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Zhuwei and Xia, Zimin and Chen, He and Yue, Linwei and Zheng, Xianwei}, title = {Regulating Rather than Constraining: Adaptive Guidance for Complex Spectral Reconstruction in Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34832-34842} }
Diffusion-Based Makeup Transfer with Facial Region-Aware Makeup Features: Zheng Gao,

Debin Meng,

Yunqi Miao,

Zhensong Zhang,

Songcen Xu,

Ioannis Patras,

Jifei Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Zheng and Meng, Debin and Miao, Yunqi and Zhang, Zhensong and Xu, Songcen and Patras, Ioannis and Song, Jifei}, title = {Diffusion-Based Makeup Transfer with Facial Region-Aware Makeup Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4656-4666} }
SAQN: Semantic-based Adaptive Query Network for 3D Referring Expression Segmentation: Jiale Huang,

Shangfei Wang; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiale and Wang, Shangfei}, title = {SAQN: Semantic-based Adaptive Query Network for 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38627-38636} }
Beyond 3D VQAs: Injecting 3D Spatial Priors into Vision-Language Models for Enhanced Geometric Reasoning: Chun-Hsiao Yeh,

Shengyi Qian,

Manchen Wang,

Yi Ma,

Joseph Tighe,

Fanyi Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yeh_2026_CVPR, author = {Yeh, Chun-Hsiao and Qian, Shengyi and Wang, Manchen and Ma, Yi and Tighe, Joseph and Xiao, Fanyi}, title = {Beyond 3D VQAs: Injecting 3D Spatial Priors into Vision-Language Models for Enhanced Geometric Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16723-16733} }
Pressure2Motion: Hierarchical Human Motion Reconstruction from Ground Pressure with Text Guidance: Zhengxuan Li,

Qinhui Yang,

Yiyu Zhuang,

Chuan Guo,

Xinxin Zuo,

Xiaoxiao Long,

Yao Yao,

Xun Cao,

Qiu Shen,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhengxuan and Yang, Qinhui and Zhuang, Yiyu and Guo, Chuan and Zuo, Xinxin and Long, Xiaoxiao and Yao, Yao and Cao, Xun and Shen, Qiu and Zhu, Hao}, title = {Pressure2Motion: Hierarchical Human Motion Reconstruction from Ground Pressure with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23495-23505} }
Making the Classification Explanation Faithful to the Confidence Score: Jian-Xun Mi,

Lu Pan,

Weisheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Mi_2026_CVPR, author = {Mi, Jian-Xun and Pan, Lu and Li, Weisheng}, title = {Making the Classification Explanation Faithful to the Confidence Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38959-38968} }
Selective Amnesia using Contrastive Subnet Erasure for Class Level Unlearning in Vision Models: Vishal Pramanik,

Maisha Maliha,

Susmit Jha,

Alvaro Velasquez,

Olivera Kotevska,

Sumit Kumar Jha; [pdf] [supp]
[bibtex]
@InProceedings{Pramanik_2026_CVPR, author = {Pramanik, Vishal and Maliha, Maisha and Jha, Susmit and Velasquez, Alvaro and Kotevska, Olivera and Jha, Sumit Kumar}, title = {Selective Amnesia using Contrastive Subnet Erasure for Class Level Unlearning in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31662-31671} }
VT-Intrinsic: Physics-Based Decomposition of Reflectance and Shading using a Single Visible-Thermal Image Pair: Zeqing Yuan,

Mani Ramanagopal,

Aswin C. Sankaranarayanan,

Srinivasa G. Narasimhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Zeqing and Ramanagopal, Mani and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {VT-Intrinsic: Physics-Based Decomposition of Reflectance and Shading using a Single Visible-Thermal Image Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41752-41761} }
Exemplar-Free Continual Learning for State Space Models: Isaac Ning Lee,

Leila Mahmoodi,

Trung Le,

Mehrtash Harandi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Isaac Ning and Mahmoodi, Leila and Le, Trung and Harandi, Mehrtash}, title = {Exemplar-Free Continual Learning for State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25077-25087} }
S2C2Seg: Semantic-Spatial Consistency and Category Optimization for Open-Vocabulary Segmentation: Yuhao Qing,

Yueying Wang,

Chaoyang Chen,

Weidong Zhang,

Jie Wen,

Xin Xu; [pdf]
[bibtex]
@InProceedings{Qing_2026_CVPR, author = {Qing, Yuhao and Wang, Yueying and Chen, Chaoyang and Zhang, Weidong and Wen, Jie and Xu, Xin}, title = {S2C2Seg: Semantic-Spatial Consistency and Category Optimization for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6293-6303} }
AdaDexTrack: Dynamic Modulation for Adaptive and Generalizable Dexterous Manipulation Tracking: Jianibieke Adalibieke,

Qianwei Han,

Xueyi Liu,

Yuzhe Qin,

Li Yi; [pdf] [supp]
[bibtex]
@InProceedings{Adalibieke_2026_CVPR, author = {Adalibieke, Jianibieke and Han, Qianwei and Liu, Xueyi and Qin, Yuzhe and Yi, Li}, title = {AdaDexTrack: Dynamic Modulation for Adaptive and Generalizable Dexterous Manipulation Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28021-28031} }
Do You Have Freestyle? Expressive Humanoid Locomotion via Audio Control: Zhe Li,

Cheng Chi,

Yangyang Wei,

Boan Zhu,

Tao Huang,

Zhenguo Sun,

Yibo Peng,

Pengwei Wang,

Zhongyuan Wang,

Fangzhou Liu,

Chang Xu,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhe and Chi, Cheng and Wei, Yangyang and Zhu, Boan and Huang, Tao and Sun, Zhenguo and Peng, Yibo and Wang, Pengwei and Wang, Zhongyuan and Liu, Fangzhou and Xu, Chang and Zhang, Shanghang}, title = {Do You Have Freestyle? Expressive Humanoid Locomotion via Audio Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {956-965} }
LightRR: A Lightweight Network for Single Image Reflection Removal: Wenbin Yin,

Junkang Zhang,

Sunzhe Yang,

Faming Fang,

Guixu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Wenbin and Zhang, Junkang and Yang, Sunzhe and Fang, Faming and Zhang, Guixu}, title = {LightRR: A Lightweight Network for Single Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19706-19715} }
Thinking in Dynamics: How Multimodal Large Language Models Perceive, Track, and Reason Dynamics in Physical 4D World: Yuzhi Huang,

Kairun Wen,

Rongxin Gao,

Dongxuan Liu,

Yibin Lou,

Jie Wu,

Jing Xu,

Jian Zhang,

Zheng Yang,

Yunlong Lin,

Chenxin Li,

Panwang Pan,

Junbin Lu,

Jingyan Jiang,

Xinghao Ding,

Yue Huang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yuzhi and Wen, Kairun and Gao, Rongxin and Liu, Dongxuan and Lou, Yibin and Wu, Jie and Xu, Jing and Zhang, Jian and Yang, Zheng and Lin, Yunlong and Li, Chenxin and Pan, Panwang and Lu, Junbin and Jiang, Jingyan and Ding, Xinghao and Huang, Yue and Wang, Zhi}, title = {Thinking in Dynamics: How Multimodal Large Language Models Perceive, Track, and Reason Dynamics in Physical 4D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33446-33456} }
3DReflecNet: A Large-Scale Dataset for 3D Reconstruction of Reflective, Transparent, and Low-Texture Objects: Zhicheng Liang,

Haoyi Yu,

Boyan Li,

Dayou Zhang,

Zijian Cao,

Tianyi Gong,

Junhua Liu,

Shuguang Cui,

Fangxin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhicheng and Yu, Haoyi and Li, Boyan and Zhang, Dayou and Cao, Zijian and Gong, Tianyi and Liu, Junhua and Cui, Shuguang and Wang, Fangxin}, title = {3DReflecNet: A Large-Scale Dataset for 3D Reconstruction of Reflective, Transparent, and Low-Texture Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7244-7255} }
PHAC: Promptable Human Amodal Completion: Seung Young Noh,

Ju Yong Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noh_2026_CVPR, author = {Noh, Seung Young and Chang, Ju Yong}, title = {PHAC: Promptable Human Amodal Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30750-30760} }
Spherical Leech Quantization for Visual Tokenization and Generation: Yue Zhao,

Hanwen Jiang,

Zhenlin Xu,

Chutong Yang,

Ehsan Adeli,

Philipp Kraehenbuehl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yue and Jiang, Hanwen and Xu, Zhenlin and Yang, Chutong and Adeli, Ehsan and Kraehenbuehl, Philipp}, title = {Spherical Leech Quantization for Visual Tokenization and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12913-12923} }
Outlier-Robust Diffusion Solvers for Inverse Problems: Yang Zheng,

Jiahua Liu,

Tongyao Pang,

Wen Li,

Zhaoqiang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yang and Liu, Jiahua and Pang, Tongyao and Li, Wen and Liu, Zhaoqiang}, title = {Outlier-Robust Diffusion Solvers for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30782-30791} }
C^2FG: Control Classifier-Free Guidance via Score Discrepancy Analysis: Jiayang Gao,

Tianyi Zheng,

Jiayang Zou,

Fengxiang Yang,

Shice Liu,

Luyao Fan,

Zheyu Zhang,

Hao Zhang,

Jinwei Chen,

Peng-Tao Jiang,

Bo Li,

Jia Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jiayang and Zheng, Tianyi and Zou, Jiayang and Yang, Fengxiang and Liu, Shice and Fan, Luyao and Zhang, Zheyu and Zhang, Hao and Chen, Jinwei and Jiang, Peng-Tao and Li, Bo and Wang, Jia}, title = {C{\textasciicircum}2FG: Control Classifier-Free Guidance via Score Discrepancy Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34398-34407} }
MetaSpectra+: A Compact Broadband Metasurface Camera for Snapshot Hyperspectral+ Imaging: Yuxuan Liu,

Wei Xu,

Qi Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxuan and Xu, Wei and Guo, Qi}, title = {MetaSpectra+: A Compact Broadband Metasurface Camera for Snapshot Hyperspectral+ Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {209-219} }
Thermal is Always Wild: Characterizing and Addressing Challenges in Thermal-Only Novel View Synthesis: M. Kerem Aydin,

Vishwanath Saragadam,

Emma Alexander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aydin_2026_CVPR, author = {Aydin, M. Kerem and Saragadam, Vishwanath and Alexander, Emma}, title = {Thermal is Always Wild: Characterizing and Addressing Challenges in Thermal-Only Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29845-29854} }
RHO: Robust Holistic OSM-Based Metric Cross-View Geo-Localization: Junwei Zheng,

Ruize Dai,

Ruiping Liu,

Zichao Zeng,

Yufan Chen,

Fangjinhua Wang,

Kunyu Peng,

Kailun Yang,

Jiaming Zhang,

Rainer Stiefelhagen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Junwei and Dai, Ruize and Liu, Ruiping and Zeng, Zichao and Chen, Yufan and Wang, Fangjinhua and Peng, Kunyu and Yang, Kailun and Zhang, Jiaming and Stiefelhagen, Rainer}, title = {RHO: Robust Holistic OSM-Based Metric Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33727-33737} }
OASIS: On-Demand Hierarchical Event Memory for Streaming Video Reasoning: Zhijia Liang,

Jiaming Li,

Weikai Chen,

Yanhao Zhang,

Haonan Lu,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Zhijia and Li, Jiaming and Chen, Weikai and Zhang, Yanhao and Lu, Haonan and Li, Guanbin}, title = {OASIS: On-Demand Hierarchical Event Memory for Streaming Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2821-2831} }
Beyond What's Shared: Recovering Lost Unique Information from Intermediate Layers to Boost Multimodal Geo-Foundation Models: JangHyeon Lee,

Philipe Ambrozio Dias,

Yao-Yi Chiang,

Dalton Lunga; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, JangHyeon and Dias, Philipe Ambrozio and Chiang, Yao-Yi and Lunga, Dalton}, title = {Beyond What's Shared: Recovering Lost Unique Information from Intermediate Layers to Boost Multimodal Geo-Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1585-1595} }
SceneMaker: Open-set 3D Scene Generation with Decoupled De-occlusion and Pose Estimation Model: Yukai Shi,

Weiyu Li,

Zihao Wang,

Hongyang Li,

Xingyu Chen,

Ping Tan,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yukai and Li, Weiyu and Wang, Zihao and Li, Hongyang and Chen, Xingyu and Tan, Ping and Zhang, Lei}, title = {SceneMaker: Open-set 3D Scene Generation with Decoupled De-occlusion and Pose Estimation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27146-27156} }
VAD-GS: Visibility-Aware Densification for 3D Gaussian Splatting in Dynamic Urban Scenes: Yikang Zhang,

Rui Fan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yikang and Fan, Rui}, title = {VAD-GS: Visibility-Aware Densification for 3D Gaussian Splatting in Dynamic Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4953-4962} }
Uncertainty-guided Compositional Alignment with Part-to-Whole Semantic Representativeness in Hyperbolic Vision-Language Models: Hayeon Kim,

Ji Ha Jang,

Junghun James Kim,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hayeon and Jang, Ji Ha and Kim, Junghun James and Chun, Se Young}, title = {Uncertainty-guided Compositional Alignment with Part-to-Whole Semantic Representativeness in Hyperbolic Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36861-36870} }
Self-Critical Distillation Network for Video-based Commonsense Captioning: Mengqi Yuan,

Gengyun Jia,

Bing-Kun Bao; [pdf]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Mengqi and Jia, Gengyun and Bao, Bing-Kun}, title = {Self-Critical Distillation Network for Video-based Commonsense Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40527-40536} }
Block-Sparse Global Attention for Efficient Multi-View Geometry Transformers: Chung-Shien Brian Wang,

Christian Schmidt,

Jens Piekenbrinck,

Bastian Leibe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chung-Shien Brian and Schmidt, Christian and Piekenbrinck, Jens and Leibe, Bastian}, title = {Block-Sparse Global Attention for Efficient Multi-View Geometry Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14546-14555} }
TANGO: Learning Distribution-wise Foundation Prior Consistency and Instance-wise Style Calibration for Medical Image Generalization: Chuang Liu,

Yichao Cao,

Xiu Su,

Haogang Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chuang and Cao, Yichao and Su, Xiu and Zhu, Haogang}, title = {TANGO: Learning Distribution-wise Foundation Prior Consistency and Instance-wise Style Calibration for Medical Image Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8545-8555} }
A Provable Energy-Guided Test-Time Defense Boosting Adversarial Robustness of Large Vision-Language Models: Mujtaba Hussain Mirza,

Antonio D'Orazio,

Odelia Melamed,

Iacopo Masi; [pdf] [supp]
[bibtex]
@InProceedings{Mirza_2026_CVPR, author = {Mirza, Mujtaba Hussain and D'Orazio, Antonio and Melamed, Odelia and Masi, Iacopo}, title = {A Provable Energy-Guided Test-Time Defense Boosting Adversarial Robustness of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8598-8609} }
TF-CADE: Foreground-Concentrated Text-Video Alignment for Zero-Shot Temporal Action Detection: Yearang Lee,

Ho-Joong Kim,

Seong-Whan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Yearang and Kim, Ho-Joong and Lee, Seong-Whan}, title = {TF-CADE: Foreground-Concentrated Text-Video Alignment for Zero-Shot Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2843-2852} }
Inter-Edit: First Benchmark for Interactive Instruction-Based Image Editing: Delong Liu,

Haotian Hou,

Zhaohui Hou,

Zhiyuan Huang,

Shihao Han,

Mingjie Zhan,

Zhicheng Zhao,

Fei Su; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Delong and Hou, Haotian and Hou, Zhaohui and Huang, Zhiyuan and Han, Shihao and Zhan, Mingjie and Zhao, Zhicheng and Su, Fei}, title = {Inter-Edit: First Benchmark for Interactive Instruction-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37290-37300} }
Emergent Outlier View Rejection in Visual Geometry Grounded Transformers: Jisang Han,

Sunghwan Hong,

Jaewoo Jung,

Wooseok Jang,

Honggyu An,

Qianqian Wang,

Seungryong Kim,

Chen Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Jisang and Hong, Sunghwan and Jung, Jaewoo and Jang, Wooseok and An, Honggyu and Wang, Qianqian and Kim, Seungryong and Feng, Chen}, title = {Emergent Outlier View Rejection in Visual Geometry Grounded Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {427-437} }
TiViBench: Benchmarking Think-in-Video Reasoning for Video Generation: Harold Haodong Chen,

Disen Lan,

Wen-Jie Shu,

Qingyang Liu,

Zihan Wang,

Sirui Chen,

Wenkai Cheng,

Kanghao Chen,

Hongfei Zhang,

Zixin Zhang,

Rongjin Guo,

Yu Cheng,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Harold Haodong and Lan, Disen and Shu, Wen-Jie and Liu, Qingyang and Wang, Zihan and Chen, Sirui and Cheng, Wenkai and Chen, Kanghao and Zhang, Hongfei and Zhang, Zixin and Guo, Rongjin and Cheng, Yu and Chen, Ying-Cong}, title = {TiViBench: Benchmarking Think-in-Video Reasoning for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11403-11413} }
AMap: Distilling Future Priors for Ahead-Aware Online HD Map Construction: Ruikai Li,

Xinrun Li,

Mengwei Xie,

Hao Shan,

Shoumeng Qiu,

Xinyuan Chang,

Yizhe Fan,

Feng Xiong,

Han Jiang,

Yilong Ren,

Haiyang Yu,

Mu Xu,

Yang Long,

Varun Ojha,

Zhiyong Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ruikai and Li, Xinrun and Xie, Mengwei and Shan, Hao and Qiu, Shoumeng and Chang, Xinyuan and Fan, Yizhe and Xiong, Feng and Jiang, Han and Ren, Yilong and Yu, Haiyang and Xu, Mu and Long, Yang and Ojha, Varun and Cui, Zhiyong}, title = {AMap: Distilling Future Priors for Ahead-Aware Online HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24906-24917} }
SciEducator: Scientific Video Understanding and Educating via Deming-Cycle Multi-Agent System: Zhiyu Xu,

Weilong Yan,

Yufei Shi,

Xin Meng,

Tao He,

Huiping Zhuang,

Ming Li,

Hehe Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhiyu and Yan, Weilong and Shi, Yufei and Meng, Xin and He, Tao and Zhuang, Huiping and Li, Ming and Fan, Hehe}, title = {SciEducator: Scientific Video Understanding and Educating via Deming-Cycle Multi-Agent System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26497-26507} }
Artiverse: A Diverse and Physically Grounded Dataset for Articulated Objects: Denys Iliash,

Jiayi Liu,

Egor Fokin,

Qirui Wu,

Ali Mahdavi Amiri,

Manolis Savva,

Angel X. Chang; [pdf] [supp]
[bibtex]
@InProceedings{Iliash_2026_CVPR, author = {Iliash, Denys and Liu, Jiayi and Fokin, Egor and Wu, Qirui and Amiri, Ali Mahdavi and Savva, Manolis and Chang, Angel X.}, title = {Artiverse: A Diverse and Physically Grounded Dataset for Articulated Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8932-8942} }
Guiding Diffusion Models with Semantically Degraded Conditions: Shilong Han,

Yuming Zhang,

Hongxia Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Shilong and Zhang, Yuming and Wang, Hongxia}, title = {Guiding Diffusion Models with Semantically Degraded Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43653-43663} }
Meta-CoT: Enhancing Granularity and Generalization in Image Editing: Shiyi Zhang,

Yiji Cheng,

Tiankai Hang,

Zijin Yin,

Runze He,

Yu Xu,

Wenxun Dai,

Yunlong Lin,

Chunyu Wang,

Qinglin Lu,

Yansong Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shiyi and Cheng, Yiji and Hang, Tiankai and Yin, Zijin and He, Runze and Xu, Yu and Dai, Wenxun and Lin, Yunlong and Wang, Chunyu and Lu, Qinglin and Tang, Yansong}, title = {Meta-CoT: Enhancing Granularity and Generalization in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38004-38015} }
Z-Order Transformer for Feed-Forward Gaussian Splatting: Can Wang,

Lei Liu,

Wei Jiang,

Dong Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Can and Liu, Lei and Jiang, Wei and Xu, Dong}, title = {Z-Order Transformer for Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7362-7371} }
GHPT: Real-Time Relightable Gaussian Splatting using Hybrid Path Tracing: Jinyang Bo,

Fan Dou,

Wenrui Quan,

Shangxun Liu,

Yang Xu,

Yuhe Zhang,

Kang Li,

Guohua Geng; [pdf] [supp]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Jinyang and Dou, Fan and Quan, Wenrui and Liu, Shangxun and Xu, Yang and Zhang, Yuhe and Li, Kang and Geng, Guohua}, title = {GHPT: Real-Time Relightable Gaussian Splatting using Hybrid Path Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25990-25999} }
AutoCut: End-to-end advertisement video editing based on multimodal discretization and controllable generation: Milton Zhou,

Sizhong Qin,

Yongzhi Li,

Quan Chen,

Peng Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Milton and Qin, Sizhong and Li, Yongzhi and Chen, Quan and Jiang, Peng}, title = {AutoCut: End-to-end advertisement video editing based on multimodal discretization and controllable generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37777-37787} }
WorldGen: From Text to Traversable and Interactive 3D Worlds: Dilin Wang,

Hyunyoung Jung,

Tom Monnier,

Kihyuk Sohn,

Chuhang Zou,

Xiaoyu Xiang,

Yu-Ying Yeh,

Di Liu,

Zixuan Huang,

Thu Nguyen-Phuoc,

Yuchen Fan,

Sergiu Oprea,

Ziyan Wang,

Roman Shapovalov,

Nikolaos Sarafianos,

Thibault Groueix,

Antoine Toisoul,

Prithviraj Dhar,

Xiao Chu,

Minghao Chen,

Geon Yeong Park,

Rakesh Ranjan,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Dilin and Jung, Hyunyoung and Monnier, Tom and Sohn, Kihyuk and Zou, Chuhang and Xiang, Xiaoyu and Yeh, Yu-Ying and Liu, Di and Huang, Zixuan and Nguyen-Phuoc, Thu and Fan, Yuchen and Oprea, Sergiu and Wang, Ziyan and Shapovalov, Roman and Sarafianos, Nikolaos and Groueix, Thibault and Toisoul, Antoine and Dhar, Prithviraj and Chu, Xiao and Chen, Minghao and Park, Geon Yeong and Ranjan, Rakesh and Vedaldi, Andrea}, title = {WorldGen: From Text to Traversable and Interactive 3D Worlds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27124-27135} }
TempoControl: Temporal Attention Guidance for Text-to-Video Models: Shira Schiber,

Ofir Lindenbaum,

Idan Schwartz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schiber_2026_CVPR, author = {Schiber, Shira and Lindenbaum, Ofir and Schwartz, Idan}, title = {TempoControl: Temporal Attention Guidance for Text-to-Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36670-36679} }
PanoVGGT: Feed-Forward 3D Reconstruction from Panoramic Imagery: Yijing Guo,

Mengjun Chao,

Luo Wang,

Tianyang Zhao,

Haizhao Dai,

Yingliang Zhang,

Jingyi Yu,

Yujiao Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yijing and Chao, Mengjun and Wang, Luo and Zhao, Tianyang and Dai, Haizhao and Zhang, Yingliang and Yu, Jingyi and Shi, Yujiao}, title = {PanoVGGT: Feed-Forward 3D Reconstruction from Panoramic Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36444-36453} }
MOFA-VTON: More Fashion Possibilities with Fine-Grained Adaptations in Virtual Try-On: Xiaoyu Han,

Chenyang Wang,

Jing Wang,

Shunyuan Zheng,

Quanling Meng,

Shengping Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Xiaoyu and Wang, Chenyang and Wang, Jing and Zheng, Shunyuan and Meng, Quanling and Zhang, Shengping}, title = {MOFA-VTON: More Fashion Possibilities with Fine-Grained Adaptations in Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1895-1905} }
NavForesee: A Unified Vision-Language World Model for Hierarchical Planning and Dual-Horizon Navigation Prediction: Fei Liu,

Shichao Xie,

Minghua Luo,

Zedong Chu,

Junjun Hu,

Xiaolong Wu,

Mu Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Fei and Xie, Shichao and Luo, Minghua and Chu, Zedong and Hu, Junjun and Wu, Xiaolong and Xu, Mu}, title = {NavForesee: A Unified Vision-Language World Model for Hierarchical Planning and Dual-Horizon Navigation Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32431-32440} }
RealAppiance: Let High-fidelity Appliance Assets Controllable and Workable as Aligned Real Manauls: Yuzheng Gao,

Yuxing Long,

Lei Kang,

Yuchong Guo,

Ziyan Yu,

Shangqing Mao,

Jiyao Zhang,

Ruihai Wu,

Dongjiang Li,

Hui Shen,

Hao Dong; [pdf]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuzheng and Long, Yuxing and Kang, Lei and Guo, Yuchong and Yu, Ziyan and Mao, Shangqing and Zhang, Jiyao and Wu, Ruihai and Li, Dongjiang and Shen, Hui and Dong, Hao}, title = {RealAppiance: Let High-fidelity Appliance Assets Controllable and Workable as Aligned Real Manauls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37186-37194} }
SJD-PAC: Accelerating Speculative Jacobi Decoding via Proactive Drafting and Adaptive Continuation: Jialiang Kang,

Han Shu,

Wenshuo Li,

Yingjie Zhai,

Xinghao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Jialiang and Shu, Han and Li, Wenshuo and Zhai, Yingjie and Chen, Xinghao}, title = {SJD-PAC: Accelerating Speculative Jacobi Decoding via Proactive Drafting and Adaptive Continuation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16666-16675} }
NitroGen: An Open Foundation Model for Generalist Gaming Agents: Loïc Magne,

Anas Awadalla,

Guanzhi Wang,

Yinzhen Xu,

Joshua Belofsky,

Fengyuan Hu,

Joohwan Kim,

Ludwig Schmidt,

Georgia Gkioxari,

Jan Kautz,

Yisong Yue,

Yejin Choi,

Yuke Zhu,

Linxi Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Magne_2026_CVPR, author = {Magne, Lo{\"\i}c and Awadalla, Anas and Wang, Guanzhi and Xu, Yinzhen and Belofsky, Joshua and Hu, Fengyuan and Kim, Joohwan and Schmidt, Ludwig and Gkioxari, Georgia and Kautz, Jan and Yue, Yisong and Choi, Yejin and Zhu, Yuke and Fan, Linxi}, title = {NitroGen: An Open Foundation Model for Generalist Gaming Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21511-21521} }
Few-Shot Hybrid Incremental Learning:Continually Learning under Data Scarcity and Task Uncertainty: Yan Li,

Yuzhu Shi,

Kan Zhou,

Shu Zhang,

Diqi He,

Dingwen Zhang,

Junwei Han; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yan and Shi, Yuzhu and Zhou, Kan and Zhang, Shu and He, Diqi and Zhang, Dingwen and Han, Junwei}, title = {Few-Shot Hybrid Incremental Learning:Continually Learning under Data Scarcity and Task Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32334-32344} }
VEMamba: Efficient Isotropic Reconstruction of Volume Electron Microscopy with Axial-Lateral Consistent Mamba: Longmi Gao,

Pan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Longmi and Gao, Pan}, title = {VEMamba: Efficient Isotropic Reconstruction of Volume Electron Microscopy with Axial-Lateral Consistent Mamba}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15585-15594} }
DepthFocus: Controllable Depth Estimation for See-Through Scenes: Junhong Min,

Jimin Kim,

Minwook Kim,

Cheol-Hui Min,

Youngpil Jeon,

Minyong Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Min_2026_CVPR, author = {Min, Junhong and Kim, Jimin and Kim, Minwook and Min, Cheol-Hui and Jeon, Youngpil and Choi, Minyong}, title = {DepthFocus: Controllable Depth Estimation for See-Through Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12595-12605} }
FINE: Factorizing Knowledge for Initialization of Variable-sized Diffusion Models: Yucheng Xie,

Fu Feng,

Ruixiao Shi,

Jianlu Shen,

Jing Wang,

Yong Rui,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yucheng and Feng, Fu and Shi, Ruixiao and Shen, Jianlu and Wang, Jing and Rui, Yong and Geng, Xin}, title = {FINE: Factorizing Knowledge for Initialization of Variable-sized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42018-42028} }
OctoNav: Towards Generalist Embodied Navigation: Chen Gao,

Liankai Jin,

Xingyu Peng,

Jiazhao Zhang,

Yue Deng,

Annan Li,

He Wang,

Si Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Chen and Jin, Liankai and Peng, Xingyu and Zhang, Jiazhao and Deng, Yue and Li, Annan and Wang, He and Liu, Si}, title = {OctoNav: Towards Generalist Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40074-40084} }
HumanNOVA: Photorealistic, Universal and Rapid 3D Human Avatar Modeling from a Single Image: Hezhen Hu,

Wangbo Zhao,

Lanqing Guo,

Hanwen Jiang,

Jonathan C. Liu,

Zhiwen Fan,

Kai Wang,

Zhangyang Wang,

Georgios Pavlakos; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Hezhen and Zhao, Wangbo and Guo, Lanqing and Jiang, Hanwen and Liu, Jonathan C. and Fan, Zhiwen and Wang, Kai and Wang, Zhangyang and Pavlakos, Georgios}, title = {HumanNOVA: Photorealistic, Universal and Rapid 3D Human Avatar Modeling from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18096-18106} }
Semantic Alignment for Pose-Invariant Identity Preserving Diffusion: Jiwon Kim,

SeonHwa Kim,

Soobin Park,

Eunju Cha,

Kyong Hwan Jin; [pdf]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jiwon and Kim, SeonHwa and Park, Soobin and Cha, Eunju and Jin, Kyong Hwan}, title = {Semantic Alignment for Pose-Invariant Identity Preserving Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43363-43372} }
ULF-Loc: Unbiased Landmark Feature for Robust Visual Localization with 3D Gaussian Splatting: Yingdong Gu,

Shaocheng Yan,

Zhenjun Zhao,

Yuan Kou,

Jianxin Luo,

Pengcheng Shi,

Jiayuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Yingdong and Yan, Shaocheng and Zhao, Zhenjun and Kou, Yuan and Luo, Jianxin and Shi, Pengcheng and Li, Jiayuan}, title = {ULF-Loc: Unbiased Landmark Feature for Robust Visual Localization with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19065-19075} }
Reasoning Palette: Modulating Reasoning via Latent Contextualization for Controllable Exploration for (V)LMs: Rujiao Long,

Yang Li,

Xingyao Zhang,

Weixun Wang,

Tianqianjin Lin,

Xi Zhao,

Yuchi Xu,

Wenbo Su,

Junchi Yan,

Bo Zheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Rujiao and Li, Yang and Zhang, Xingyao and Wang, Weixun and Lin, Tianqianjin and Zhao, Xi and Xu, Yuchi and Su, Wenbo and Yan, Junchi and Zheng, Bo}, title = {Reasoning Palette: Modulating Reasoning via Latent Contextualization for Controllable Exploration for (V)LMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19463-19474} }
Minerva-Ego: Spatiotemporal Hints for Egocentric Video Understanding: Arsha Nagrani,

Jasper Uijlings,

Shyamal Buch,

Tobias Weyand,

Sudheendra Vijayanarasimhan,

Bo Hu,

Ramin Mehran,

David A Ross,

Cordelia Schmid; [pdf] [supp]
[bibtex]
@InProceedings{Nagrani_2026_CVPR, author = {Nagrani, Arsha and Uijlings, Jasper and Buch, Shyamal and Weyand, Tobias and Vijayanarasimhan, Sudheendra and Hu, Bo and Mehran, Ramin and A Ross, David and Schmid, Cordelia}, title = {Minerva-Ego: Spatiotemporal Hints for Egocentric Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38859-38869} }
CaReFlow: Cyclic Adaptive Rectified Flow for Multimodal Fusion: Sijie Mai,

Shiqin Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Sijie and Han, Shiqin}, title = {CaReFlow: Cyclic Adaptive Rectified Flow for Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37799-37809} }
Enhancing Descriptive Captions with Visual Attributes for Multimodal Perception: Yanpeng Sun,

Jing Hao,

Ke Zhu,

Jiang-Jiang Liu,

Xiaofan Li,

Na Zhao,

Zechao Li,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yanpeng and Hao, Jing and Zhu, Ke and Liu, Jiang-Jiang and Li, Xiaofan and Zhao, Na and Li, Zechao and Wang, Jingdong}, title = {Enhancing Descriptive Captions with Visual Attributes for Multimodal Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1683-1694} }
RAP: Fast Feedforward Rendering-Free Attribute-Guided Primitive Importance Score Prediction for Efficient 3D Gaussian Splatting Processing: Kaifa Yang,

Qi Yang,

Yiling Xu,

Zhu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Kaifa and Yang, Qi and Xu, Yiling and Li, Zhu}, title = {RAP: Fast Feedforward Rendering-Free Attribute-Guided Primitive Importance Score Prediction for Efficient 3D Gaussian Splatting Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33323-33332} }
Registration-Free Learnable Multi-View Capture of Faces in Dense Semantic Correspondence: Panagiotis P. Filntisis,

George Retsinas,

Radek Danecek,

Vanessa Sklyarova,

Petros Maragos,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Filntisis_2026_CVPR, author = {Filntisis, Panagiotis P. and Retsinas, George and Danecek, Radek and Sklyarova, Vanessa and Maragos, Petros and Bolkart, Timo}, title = {Registration-Free Learnable Multi-View Capture of Faces in Dense Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14512-14523} }
NESTOR: A Nested MOE-based Neural Operator for Large-Scale PDE Pre-Training: Dengdi Sun,

Xiaoya Zhou,

Xiao Wang,

Hao Si,

Wanli Lyu,

Jin Tang,

Bin Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Dengdi and Zhou, Xiaoya and Wang, Xiao and Si, Hao and Lyu, Wanli and Tang, Jin and Luo, Bin}, title = {NESTOR: A Nested MOE-based Neural Operator for Large-Scale PDE Pre-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6147-6156} }
Think-as-You-See: Streaming Chain-of-Thought Reasoning for Large Vision-Language Models: Jialiang Zhang,

Junlong Tong,

Junyan Lin,

Hao Wu,

Yirong Sun,

Yunpu Ma,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jialiang and Tong, Junlong and Lin, Junyan and Wu, Hao and Sun, Yirong and Ma, Yunpu and Shen, Xiaoyu}, title = {Think-as-You-See: Streaming Chain-of-Thought Reasoning for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11998-12008} }
From Detection to Association: Learning Discriminative Object Embeddings for Multi-Object Tracking: Yuqing Shao,

Yuchen Yang,

Rui Yu,

Weilong Li,

Xu Guo,

Huaicheng Yan,

Wei Wang,

Xiao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Yuqing and Yang, Yuchen and Yu, Rui and Li, Weilong and Guo, Xu and Yan, Huaicheng and Wang, Wei and Sun, Xiao}, title = {From Detection to Association: Learning Discriminative Object Embeddings for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6878-6888} }
Sketch2CT: Multimodal Diffusion for Structure-Aware 3D Medical Volume Generation: Delin An,

Chaoli Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Delin and Wang, Chaoli}, title = {Sketch2CT: Multimodal Diffusion for Structure-Aware 3D Medical Volume Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37600-37610} }
Unlocking Pre-trained Weights: Parameter Inheritance for Zero-Shot Initialization: Jiaze Xu,

Shiyu Xia,

Jiaqi Lv,

Xin Geng; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiaze and Xia, Shiyu and Lv, Jiaqi and Geng, Xin}, title = {Unlocking Pre-trained Weights: Parameter Inheritance for Zero-Shot Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34502-34511} }
Fast-FoundationStereo: Real-Time Zero-Shot Stereo Matching: Bowen Wen,

Shaurya Dewan,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Bowen and Dewan, Shaurya and Birchfield, Stan}, title = {Fast-FoundationStereo: Real-Time Zero-Shot Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7513-7524} }
UniChange: Unifying Change Detection with Multimodal Large Language Model: Xu Zhang,

Danyang Li,

Xiaohang Dong,

Tianhao Wu,

Hualong Yu,

Jianye Wang,

Qicheng Li,

Xiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xu and Li, Danyang and Dong, Xiaohang and Wu, Tianhao and Yu, Hualong and Wang, Jianye and Li, Qicheng and Li, Xiang}, title = {UniChange: Unifying Change Detection with Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42169-42179} }
Omni-Attribute: Open-vocabulary Attribute Encoder for Visual Concept Personalization: Tsai-Shien Chen,

Aliaksandr Siarohin,

Gordon Guocheng Qian,

Kuan-Chieh Jackson Wang,

Egor Nemchinov,

Moayed Haji-Ali,

Riza Alp Guler,

Willi Menapace,

Ivan Skorokhodov,

Anil Kag,

Jun-Yan Zhu,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Qian, Gordon Guocheng and Wang, Kuan-Chieh Jackson and Nemchinov, Egor and Haji-Ali, Moayed and Guler, Riza Alp and Menapace, Willi and Skorokhodov, Ivan and Kag, Anil and Zhu, Jun-Yan and Tulyakov, Sergey}, title = {Omni-Attribute: Open-vocabulary Attribute Encoder for Visual Concept Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8194-8204} }
LangField4D: Learning Identity-Adaptive and Spatio-Temporal Continuous 4D Language Fields for Dynamic Scenes: Yichao Xu,

Qiaowei Miao,

Jinsheng Quan,

Wei Yang,

Zhihui Li,

Yawei Luo; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yichao and Miao, Qiaowei and Quan, Jinsheng and Yang, Wei and Li, Zhihui and Luo, Yawei}, title = {LangField4D: Learning Identity-Adaptive and Spatio-Temporal Continuous 4D Language Fields for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9558-9569} }
Representing 3D Faces with Learnable B-Spline Volumes: Prashanth Chandran,

Daoye Wang,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chandran_2026_CVPR, author = {Chandran, Prashanth and Wang, Daoye and Bolkart, Timo}, title = {Representing 3D Faces with Learnable B-Spline Volumes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13824-13834} }
ChartR: Evaluating Reasoning Accuracy and Robustness in Chart Question Answering: Xiaojun Chen,

Sixiao Luo,

Ziqi Liu,

Min Yang,

Qin Zhang,

Liang-Jie Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaojun and Luo, Sixiao and Liu, Ziqi and Yang, Min and Zhang, Qin and Zhang, Liang-Jie}, title = {ChartR: Evaluating Reasoning Accuracy and Robustness in Chart Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41193-41202} }
Elucidating the Design Space of Arbitrary-Noise-Based Diffusion Models: Xingyu Qiu,

Mengying Yang,

Xinghua Ma,

Dong Liang,

Fanding Li,

Gongning Luo,

Wei Wang,

Kuanquan Wang,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xingyu and Yang, Mengying and Ma, Xinghua and Liang, Dong and Li, Fanding and Luo, Gongning and Wang, Wei and Wang, Kuanquan and Li, Shuo}, title = {Elucidating the Design Space of Arbitrary-Noise-Based Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30833-30842} }
VIVA: VLM-Guided Instruction-Based Video Editing with Reward Optimization: Xiaoyan Cong,

Haotian Yang,

Angtian Wang,

Yizhi Wang,

Yiding Yang,

Canyu Zhang,

Chongyang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cong_2026_CVPR, author = {Cong, Xiaoyan and Yang, Haotian and Wang, Angtian and Wang, Yizhi and Yang, Yiding and Zhang, Canyu and Ma, Chongyang}, title = {VIVA: VLM-Guided Instruction-Based Video Editing with Reward Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34364-34374} }
Adaptive 3D Perception for Small Aerial Targets Under Sparse Sampling via Reinforcement Learning: Shenghai Yuan,

Wei Yihan,

Jason Yee,

Zhuoran Qiao,

boyang lou,

Enwen Hu; [pdf]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Shenghai and Yihan, Wei and Yee, Jason and Qiao, Zhuoran and lou, boyang and Hu, Enwen}, title = {Adaptive 3D Perception for Small Aerial Targets Under Sparse Sampling via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39064-39074} }
FluxMem: Adaptive Hierarchical Memory for Streaming Video Understanding: Yiweng Xie,

Bo He,

Junke Wang,

Xiangyu Zheng,

Ziyi Ye,

Zuxuan Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yiweng and He, Bo and Wang, Junke and Zheng, Xiangyu and Ye, Ziyi and Wu, Zuxuan}, title = {FluxMem: Adaptive Hierarchical Memory for Streaming Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31272-31282} }
Adapting In-context Generation for Enhanced Composed Image Retrieval: Haiwen Li,

Zining Chen,

Delong Liu,

Zhaohui Hou,

Zhicheng Zhao,

Fei Su; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haiwen and Chen, Zining and Liu, Delong and Hou, Zhaohui and Zhao, Zhicheng and Su, Fei}, title = {Adapting In-context Generation for Enhanced Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29167-29177} }
Focus on Background: Exploring SAM's Potential in Few-shot Medical Image Segmentation with Background-centric Prompting: Yuntian Bo,

Yazhou Zhu,

Piotr Koniusz,

Haofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Bo_2026_CVPR, author = {Bo, Yuntian and Zhu, Yazhou and Koniusz, Piotr and Zhang, Haofeng}, title = {Focus on Background: Exploring SAM's Potential in Few-shot Medical Image Segmentation with Background-centric Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30032-30041} }
Convolutional Neural Networks Driven by Content Similarity: Ligeng Zou,

Guihu Zhao; [pdf]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Ligeng and Zhao, Guihu}, title = {Convolutional Neural Networks Driven by Content Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27449-27459} }
PEARL: Geometry Aligns Semantics for Training-Free Open-Vocabulary Semantic Segmentation: Gensheng Pei,

Xiruo Jiang,

Xinhao Cai,

Tao Chen,

Yazhou Yao,

Byeungwoo Jeon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2026_CVPR, author = {Pei, Gensheng and Jiang, Xiruo and Cai, Xinhao and Chen, Tao and Yao, Yazhou and Jeon, Byeungwoo}, title = {PEARL: Geometry Aligns Semantics for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17927-17937} }
CoD: A Diffusion Foundation Model for Image Compression: Zhaoyang Jia,

Zihan Zheng,

Naifu Xue,

Jiahao Li,

Bin Li,

Zongyu Guo,

Xiaoyi Zhang,

Houqiang Li,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Zhaoyang and Zheng, Zihan and Xue, Naifu and Li, Jiahao and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Li, Houqiang and Lu, Yan}, title = {CoD: A Diffusion Foundation Model for Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38420-38429} }
ForceVLA2: Unleashing Hybrid Force-Position Control with Force Awareness for Contact-Rich Manipulation: Yang Li,

Zhaxizhuoma Zhaxizhuoma,

Hongru Jiang,

Junjie Xia,

Hongquan Zhang,

Jinda Du,

Yunsong Zhou,

Jia Zeng,

Ce Hao,

Jieji Ren,

Qiaojun Yu,

Cewu Lu,

Yu Qiao,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yang and Zhaxizhuoma, Zhaxizhuoma and Jiang, Hongru and Xia, Junjie and Zhang, Hongquan and Du, Jinda and Zhou, Yunsong and Zeng, Jia and Hao, Ce and Ren, Jieji and Yu, Qiaojun and Lu, Cewu and Qiao, Yu and Pang, Jiangmiao}, title = {ForceVLA2: Unleashing Hybrid Force-Position Control with Force Awareness for Contact-Rich Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8911-8920} }
TGSFormer: Scalable Temporal Gaussian Splatting for Embodied Semantic Scene Completion: Rui Qian,

Haozhi Cao,

Tianchen Deng,

Tianxin Hu,

Weixiang Guo,

Shenghai Yuan,

Lihua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2026_CVPR, author = {Qian, Rui and Cao, Haozhi and Deng, Tianchen and Hu, Tianxin and Guo, Weixiang and Yuan, Shenghai and Xie, Lihua}, title = {TGSFormer: Scalable Temporal Gaussian Splatting for Embodied Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11881-11890} }
CRFT: Consistent-Recurrent Feature Flow Transformer for Cross-Modal Image Registration: Xuecong Liu,

Mengzhu Ding,

Zixuan Sun,

Zhang Li,

Xichao Teng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xuecong and Ding, Mengzhu and Sun, Zixuan and Li, Zhang and Teng, Xichao}, title = {CRFT: Consistent-Recurrent Feature Flow Transformer for Cross-Modal Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34784-34794} }
VISTA: A Test-Time Self-Improving Video Generation Agent: Do Xuan Long,

Xingchen Wan,

Hootan Nakhost,

Chen-Yu Lee,

Tomas Pfister,

Sercan Ö. Arik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2026_CVPR, author = {Long, Do Xuan and Wan, Xingchen and Nakhost, Hootan and Lee, Chen-Yu and Pfister, Tomas and Arik, Sercan \"O.}, title = {VISTA: A Test-Time Self-Improving Video Generation Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6021-6032} }
Dynamic-Static Decomposition for Novel View Synthesis of Dynamic Scenes with Spiking Neurons: Lingyun Dai,

Zehao Chen,

Yan Liu,

Shi Gu,

Peng Lin,

De Ma,

Huajin Tang,

Qian Zheng,

Gang Pan; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2026_CVPR, author = {Dai, Lingyun and Chen, Zehao and Liu, Yan and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {Dynamic-Static Decomposition for Novel View Synthesis of Dynamic Scenes with Spiking Neurons}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8342-8352} }
Hierarchical Visual Relocalization with Nearest View Synthesis from Feature Gaussian Splatting: Huaqi Tao,

Bingxi Liu,

Guangcheng Chen,

Fulin Tang,

Li He,

Hong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Huaqi and Liu, Bingxi and Chen, Guangcheng and Tang, Fulin and He, Li and Zhang, Hong}, title = {Hierarchical Visual Relocalization with Nearest View Synthesis from Feature Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40981-40991} }
Unleashing Vision-Language Semantics for Deepfake Video Detection: Jiawen Zhu,

Yunqi Miao,

Xueyi Zhang,

Jiankang Deng,

Guansong Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jiawen and Miao, Yunqi and Zhang, Xueyi and Deng, Jiankang and Pang, Guansong}, title = {Unleashing Vision-Language Semantics for Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42953-42963} }
Protect to Adapt: Orthogonal Subspace Control with Ranked Negative-Prompt Curriculum for Few-Shot Action Recognition: Hantao Qi,

Yan Yan,

Junlong Gao,

Hanzi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2026_CVPR, author = {Qi, Hantao and Yan, Yan and Gao, Junlong and Wang, Hanzi}, title = {Protect to Adapt: Orthogonal Subspace Control with Ranked Negative-Prompt Curriculum for Few-Shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20160-20169} }
MaskFocus: Focusing Policy Optimization on Critical Steps for Masked Image Generation: Guohui Zhang,

Hu Yu,

Xiaoxiao Ma,

Yaning Pan,

Hang Xu,

Jie Huang,

Feng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohui and Yu, Hu and Ma, Xiaoxiao and Pan, Yaning and Xu, Hang and Huang, Jie and Zhao, Feng}, title = {MaskFocus: Focusing Policy Optimization on Critical Steps for Masked Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5956-5966} }
Differentially Private 2D Human Pose Estimation: Kaushik Bhargav Sivangi,

Paul Henderson,

Fani Deligianni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sivangi_2026_CVPR, author = {Sivangi, Kaushik Bhargav and Henderson, Paul and Deligianni, Fani}, title = {Differentially Private 2D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21143-21153} }
MetricHMSR: Metric Human Mesh and Scene Recovery from Monocular Images: Chentao Song,

He Zhang,

Haolei Yuan,

Haozhe Lin,

Jianhua Tao,

Hongwen Zhang,

Tao Yu; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Chentao and Zhang, He and Yuan, Haolei and Lin, Haozhe and Tao, Jianhua and Zhang, Hongwen and Yu, Tao}, title = {MetricHMSR: Metric Human Mesh and Scene Recovery from Monocular Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21132-21142} }
Uni-Hema: Unified Model for Digital Hematopathology: Abdul Rehman,

Iqra Rasool,

Ayisha Imran,

Mohsen Ali,

Waqas Sultani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rehman_2026_CVPR, author = {Rehman, Abdul and Rasool, Iqra and Imran, Ayisha and Ali, Mohsen and Sultani, Waqas}, title = {Uni-Hema: Unified Model for Digital Hematopathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37578-37589} }
ChartNet: A Million-Scale, High-Quality Multimodal Dataset for Robust Chart Understanding: Jovana Kondic,

Pengyuan Li,

Dhiraj Joshi,

Isaac Sanchez,

Ben Wiesel,

Shafiq Abedin,

Amit Alfassy,

Eli Schwartz,

Daniel Caraballo,

Yagmur Gizem Cinar,

Florian Scheidegger,

Steven I. Ross,

Daniel Karl I. Weidele,

Hang Hua,

Ekaterina Arutyunova,

Roei Herzig,

Zihan Wang,

Xinyue Yu,

Yunfei Zhao,

Sicong Jiang,

Minghao Liu,

Qunshu Lin,

Aude Oliva,

Rogerio Feris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kondic_2026_CVPR, author = {Kondic, Jovana and Li, Pengyuan and Joshi, Dhiraj and Sanchez, Isaac and Wiesel, Ben and Abedin, Shafiq and Alfassy, Amit and Schwartz, Eli and Caraballo, Daniel and Cinar, Yagmur Gizem and Scheidegger, Florian and Ross, Steven I. and Weidele, Daniel Karl I. and Hua, Hang and Arutyunova, Ekaterina and Herzig, Roei and Wang, Zihan and Yu, Xinyue and Zhao, Yunfei and Jiang, Sicong and Liu, Minghao and Lin, Qunshu and Oliva, Aude and Feris, Rogerio}, title = {ChartNet: A Million-Scale, High-Quality Multimodal Dataset for Robust Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15922-15932} }
MapRoute:Precise-Concept Erasing Mappers via Semantic Routing: Sihao Li,

Baixi Liang,

Shuohong Xia,

Yunyun Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Sihao and Liang, Baixi and Xia, Shuohong and Yang, Yunyun}, title = {MapRoute:Precise-Concept Erasing Mappers via Semantic Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10187-10196} }
GraspGen-X: Cross-Embodiment 6-DOF Diffusion-based Grasping: Beining Han,

Yu-Wei Chao,

Erwin Coumans,

Clemens Eppner,

Jia Deng,

Stan Birchfield,

Adithyavairavan Murali; [pdf] [supp]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Beining and Chao, Yu-Wei and Coumans, Erwin and Eppner, Clemens and Deng, Jia and Birchfield, Stan and Murali, Adithyavairavan}, title = {GraspGen-X: Cross-Embodiment 6-DOF Diffusion-based Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20878-20889} }
Not All Birds Look The Same: Identity-Preserving Generation For Birds: Aaron Sun,

Oindrila Saha,

Subhransu Maji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Aaron and Saha, Oindrila and Maji, Subhransu}, title = {Not All Birds Look The Same: Identity-Preserving Generation For Birds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1983-1993} }
Geometric-Aware Hypergraph Reasoning for Novel Class Discovery in Point Cloud Segmentation: Zihao Zhang,

Aming Wu,

Yang Li,

Yahong Han,

Jialie Shen; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zihao and Wu, Aming and Li, Yang and Han, Yahong and Shen, Jialie}, title = {Geometric-Aware Hypergraph Reasoning for Novel Class Discovery in Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10006-10015} }
Discriminative Perception via Anchored Description for Reasoning Segmentation: Tao Yang,

Qing Zhou,

Yanliang Li,

Qi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Tao and Zhou, Qing and Li, Yanliang and Wang, Qi}, title = {Discriminative Perception via Anchored Description for Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13189-13198} }
Dual Band Thermal Videography: Separating Time-Varying Reflection and Emission Near Ambient Conditions: Sriram Narayanan,

Mani Ramanagopal,

Srinivasa Narasimhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Narayanan_2026_CVPR, author = {Narayanan, Sriram and Ramanagopal, Mani and Narasimhan, Srinivasa}, title = {Dual Band Thermal Videography: Separating Time-Varying Reflection and Emission Near Ambient Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {199-208} }
Breaking the Continuum: Discrete Distribution Learning for Structural MRI Reconstruction: Tianle Lyu,

Mengjingcheng Mo,

Ting Wen,

Zhen Song,

Zinan Xiong,

Yanjie Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Lyu_2026_CVPR, author = {Lyu, Tianle and Mo, Mengjingcheng and Wen, Ting and Song, Zhen and Xiong, Zinan and Zhu, Yanjie}, title = {Breaking the Continuum: Discrete Distribution Learning for Structural MRI Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37568-37577} }
SpaceDrive: Infusing Spatial Awareness into VLM-based Autonomous Driving: Peizheng Li,

Zhenghao Zhang,

David Holtz,

Hang Yu,

Yutong Yang,

Yuzhi Lai,

Rui Song,

Andreas Geiger,

Andreas Zell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Peizheng and Zhang, Zhenghao and Holtz, David and Yu, Hang and Yang, Yutong and Lai, Yuzhi and Song, Rui and Geiger, Andreas and Zell, Andreas}, title = {SpaceDrive: Infusing Spatial Awareness into VLM-based Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40096-40107} }
H2-Surv: Hierarchical Hyperbolic Multimodal Representation Learning for Survival Prediction: Jiaqi Yang,

Wenting Chen,

Xiangjian He,

Yuanbai Li,

Sen Yang,

Linlin Shen,

Xiaohan Xing; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiaqi and Chen, Wenting and He, Xiangjian and Li, Yuanbai and Yang, Sen and Shen, Linlin and Xing, Xiaohan}, title = {H2-Surv: Hierarchical Hyperbolic Multimodal Representation Learning for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28318-28327} }
CodeMMR: Bridging Natural Language, Code, and Image for Unified Retrieval: Jiahui Geng,

Qing Li,

Fengyu Cai,

Fakhri Karray; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Jiahui and Li, Qing and Cai, Fengyu and Karray, Fakhri}, title = {CodeMMR: Bridging Natural Language, Code, and Image for Unified Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38742-38752} }
RemedyGS: Defend 3D Gaussian Splatting Against Computation Cost Attacks: Yanping Li,

Zhening Liu,

Zijian Li,

Zehong Lin,

Jun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yanping and Liu, Zhening and Li, Zijian and Lin, Zehong and Zhang, Jun}, title = {RemedyGS: Defend 3D Gaussian Splatting Against Computation Cost Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33227-33236} }
Beyond Explicit Language: Plug-and-Play Visual-to-Linguistic Modeling Toward General Object Tracking: Kaiyang Lan,

Ying Cui,

Chenchen Jing,

Jianwei Zheng,

Dongyan Guo; [pdf] [supp]
[bibtex]
@InProceedings{Lan_2026_CVPR, author = {Lan, Kaiyang and Cui, Ying and Jing, Chenchen and Zheng, Jianwei and Guo, Dongyan}, title = {Beyond Explicit Language: Plug-and-Play Visual-to-Linguistic Modeling Toward General Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42604-42614} }
BiProLoRA: Bilevel Prompt LoRA for Real Scene Recovery: Nan An,

Long Ma,

Tengyu Ma,

Zhu Liu,

Yingchi Liu,

Risheng Liu; [pdf]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Nan and Ma, Long and Ma, Tengyu and Liu, Zhu and Liu, Yingchi and Liu, Risheng}, title = {BiProLoRA: Bilevel Prompt LoRA for Real Scene Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15465-15475} }
Benchmarking Endoscopic Surgical Image Restoration and Beyond: Jialun Pei,

Diandian Guo,

Donghui Yang,

Zhixi Li,

Yuxin Feng,

Long Ma,

Bo Du,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2026_CVPR, author = {Pei, Jialun and Guo, Diandian and Yang, Donghui and Li, Zhixi and Feng, Yuxin and Ma, Long and Du, Bo and Heng, Pheng-Ann}, title = {Benchmarking Endoscopic Surgical Image Restoration and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37413-37422} }
DRAMA: Next-Gen Dynamic Orchestration for Resilient Multi-Agent Ecosystems in Flux: Xinkui Zhao,

Yifan Zhang,

Sai Liu,

Naibo Wang,

Guanjie Cheng,

Yueshen Xu,

Chang Liu,

Shuiguang Deng,

Jianwei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xinkui and Zhang, Yifan and Liu, Sai and Wang, Naibo and Cheng, Guanjie and Xu, Yueshen and Liu, Chang and Deng, Shuiguang and Yin, Jianwei}, title = {DRAMA: Next-Gen Dynamic Orchestration for Resilient Multi-Agent Ecosystems in Flux}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1020-1030} }
EagleVision: A Dual-Stage Framework with BEV-grounding-based Chain-of-Thought for Spatial Intelligence: Jiaxu Wan,

Xu Wang,

Mengwei Xie,

Hang Zhang,

Mu Xu,

Yang Han,

Ding Yuan,

Hong Zhang,

Yifan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Jiaxu and Wang, Xu and Xie, Mengwei and Zhang, Hang and Xu, Mu and Han, Yang and Yuan, Ding and Zhang, Hong and Yang, Yifan}, title = {EagleVision: A Dual-Stage Framework with BEV-grounding-based Chain-of-Thought for Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38637-38646} }
OneStory: Coherent Multi-Shot Video Generation with Adaptive Memory: Zhaochong An,

Menglin Jia,

Haonan Qiu,

Zijian Zhou,

Xiaoke Huang,

Zhiheng Liu,

Weiming Ren,

Kumara Kahatapitiya,

Ding Liu,

Sen He,

Chenyang Zhang,

Tao Xiang,

Fanny Yang,

Serge Belongie,

Tian Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Zhaochong and Jia, Menglin and Qiu, Haonan and Zhou, Zijian and Huang, Xiaoke and Liu, Zhiheng and Ren, Weiming and Kahatapitiya, Kumara and Liu, Ding and He, Sen and Zhang, Chenyang and Xiang, Tao and Yang, Fanny and Belongie, Serge and Xie, Tian}, title = {OneStory: Coherent Multi-Shot Video Generation with Adaptive Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16173-16184} }
Tavatar: Topology-Aware Gaussian Attribute Derivation for Animatable Human Avatars: Hailin Luo,

Yifan Yang,

Jiazhi Shu,

Zixiong Huang,

Qi Chen,

Qing Du,

Mingkui Tan; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Hailin and Yang, Yifan and Shu, Jiazhi and Huang, Zixiong and Chen, Qi and Du, Qing and Tan, Mingkui}, title = {Tavatar: Topology-Aware Gaussian Attribute Derivation for Animatable Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4087-4096} }
From Weights to Concepts: Data-Free Interpretability of CLIP via Singular Vector Decomposition: Francesco Gentile,

Nicola Dall'Asen,

Francesco Tonini,

Massimiliano Mancini,

Lorenzo Vaquero,

Elisa Ricci; [pdf] [supp]
[bibtex]
@InProceedings{Gentile_2026_CVPR, author = {Gentile, Francesco and Dall'Asen, Nicola and Tonini, Francesco and Mancini, Massimiliano and Vaquero, Lorenzo and Ricci, Elisa}, title = {From Weights to Concepts: Data-Free Interpretability of CLIP via Singular Vector Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2895-2906} }
OMoBlur: An Object Motion Blur Dataset and Benchmark for Real-World Local Motion Deblurring: Dingchuan Yu,

Jiatong Li,

Jingwen Zhou,

Zhengyue Zhuge,

Yueting Chen,

Qi Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Dingchuan and Li, Jiatong and Zhou, Jingwen and Zhuge, Zhengyue and Chen, Yueting and Li, Qi}, title = {OMoBlur: An Object Motion Blur Dataset and Benchmark for Real-World Local Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22626-22635} }
StableMTL: Repurposing Latent Diffusion Models for Multi-Task Learning from Partially Annotated Synthetic Datasets: Anh-Quan Cao,

Ivan Lopes,

Raoul de Charette; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Anh-Quan and Lopes, Ivan and de Charette, Raoul}, title = {StableMTL: Repurposing Latent Diffusion Models for Multi-Task Learning from Partially Annotated Synthetic Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37788-37798} }
Locate-then-Sparsify: Attribution Guided Sparse Strategy for Visual Hallucination Mitigation: Tiantian Dang,

Chao Bi,

Shufan Shen,

Jinzhe Liu,

Qingming Huang,

Shuhui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2026_CVPR, author = {Dang, Tiantian and Bi, Chao and Shen, Shufan and Liu, Jinzhe and Huang, Qingming and Wang, Shuhui}, title = {Locate-then-Sparsify: Attribution Guided Sparse Strategy for Visual Hallucination Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18251-18260} }
Beyond Rule-Based Agents: Active Markov Games for Realistic Multi-Agent Interaction in Autonomous Driving: Yuan Gui,

Hongchen Luo,

Jiao Wang,

Liqi Qu; [pdf] [supp]
[bibtex]
@InProceedings{Gui_2026_CVPR, author = {Gui, Yuan and Luo, Hongchen and Wang, Jiao and Qu, Liqi}, title = {Beyond Rule-Based Agents: Active Markov Games for Realistic Multi-Agent Interaction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10689-10698} }
Spectral Conformal Risk Control: Distribution-Free Tail Guarantees via Bayesian Quadrature: Mohammad Mahdi Kazemi Esfeh,

Qi Yan,

Yongxing Zhang,

Zahra Gholami,

Renjie Liao,

Purang Abolmaesumi; [pdf] [supp]
[bibtex]
@InProceedings{Esfeh_2026_CVPR, author = {Esfeh, Mohammad Mahdi Kazemi and Yan, Qi and Zhang, Yongxing and Gholami, Zahra and Liao, Renjie and Abolmaesumi, Purang}, title = {Spectral Conformal Risk Control: Distribution-Free Tail Guarantees via Bayesian Quadrature}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12977-12986} }
VideoChat-M1: Collaborative Policy Planning for Video Understanding via Multi-Agent Reinforcement Learning: Boyu Chen,

Zikang Wang,

Zhengrong Yue,

Kainan Yan,

Chenyun Yu,

Yi Huang,

Zijun Liu,

Yafei Wen,

Xiaoxin Chen,

Yang Liu,

Peng Li,

Yali Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Boyu and Wang, Zikang and Yue, Zhengrong and Yan, Kainan and Yu, Chenyun and Huang, Yi and Liu, Zijun and Wen, Yafei and Chen, Xiaoxin and Liu, Yang and Li, Peng and Wang, Yali}, title = {VideoChat-M1: Collaborative Policy Planning for Video Understanding via Multi-Agent Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33772-33783} }
Cycle-Consistent Tuning for Layered Image Decomposition: Zheng Gu,

Min Lu,

Zhida Sun,

Dani Lischinski,

Daniel Cohen-Or,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zheng and Lu, Min and Sun, Zhida and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {Cycle-Consistent Tuning for Layered Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22478-22487} }
SketchVL: Policy Optimization via Fine-Grained Credit Assignment for Chart Understanding and More: Muye Huang,

Lingling Zhang,

Yifei Li,

Yaqiang Wu,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Muye and Zhang, Lingling and Li, Yifei and Wu, Yaqiang and Liu, Jun}, title = {SketchVL: Policy Optimization via Fine-Grained Credit Assignment for Chart Understanding and More}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4738-4748} }
ArchSym: Detecting 3D-Grounded Architectural Symmetries in the Wild: Hanyu Chen,

Ruojin Cai,

Steve Marschner,

Noah Snavely; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Hanyu and Cai, Ruojin and Marschner, Steve and Snavely, Noah}, title = {ArchSym: Detecting 3D-Grounded Architectural Symmetries in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36561-36570} }
Structural Graph Probing of Vision-Language Models: Haoyu He,

Yue Zhuo,

Yu Zheng,

Qi R. Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Haoyu and Zhuo, Yue and Zheng, Yu and Wang, Qi R.}, title = {Structural Graph Probing of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24084-24094} }
P-Flow: Prompting Visual Effects Generation: Rui Zhao,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rui and Shou, Mike Zheng}, title = {P-Flow: Prompting Visual Effects Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9149-9160} }
Teaching DINOv3 About Partial 3D Geometry: A Self-Supervised Geometry-Aware Approach: Viktoria Ehm,

Dongliang Cao,

Riccardo Marin,

Daniel Scholz,

Weikang Wang,

Florian Bernard,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Ehm_2026_CVPR, author = {Ehm, Viktoria and Cao, Dongliang and Marin, Riccardo and Scholz, Daniel and Wang, Weikang and Bernard, Florian and Cremers, Daniel}, title = {Teaching DINOv3 About Partial 3D Geometry: A Self-Supervised Geometry-Aware Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42071-42081} }
One Token, Two Fates: A Unified Framework via Vision Token Manipulation Against MLLMs Hallucination: Zhan Fa,

Yue Duan,

Jian Zhang,

Lei Qi,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fa_2026_CVPR, author = {Fa, Zhan and Duan, Yue and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {One Token, Two Fates: A Unified Framework via Vision Token Manipulation Against MLLMs Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11106-11115} }
SpatialDiff: 3D-Aware Object Movement via Implicit Spatial Modeling: Zheng Liu,

Zijian He,

Huiguo He,

Weizhi Zhong,

Yejun Tang,

Huan Yang,

Kun Gai,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zheng and He, Zijian and He, Huiguo and Zhong, Weizhi and Tang, Yejun and Yang, Huan and Gai, Kun and Li, Guanbin}, title = {SpatialDiff: 3D-Aware Object Movement via Implicit Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18396-18406} }
Voxify3D: Pixel Art Meets Volumetric Rendering: Yi-Chuan Huang,

Jiewen Chan,

Hao-Jen Chien,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yi-Chuan and Chan, Jiewen and Chien, Hao-Jen and Liu, Yu-Lun}, title = {Voxify3D: Pixel Art Meets Volumetric Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15398-15410} }
Multi-modal Frequency Decomposition Network for Semantic Scene Completion: Die Zuo,

Lubo Wang,

Ruonan Liu,

Qing Guo,

Chong Wang,

Dongdong Wu,

Wei Feng,

Kairui Yang,

Di Lin; [pdf] [supp]
[bibtex]
@InProceedings{Zuo_2026_CVPR, author = {Zuo, Die and Wang, Lubo and Liu, Ruonan and Guo, Qing and Wang, Chong and Wu, Dongdong and Feng, Wei and Yang, Kairui and Lin, Di}, title = {Multi-modal Frequency Decomposition Network for Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41531-41540} }
Beyond Missing Modalities: Hypergraph Conditioned Diffusion for Uncertainty-Aware Multimodal Emotion Recognition: Xihang Qiu,

Yuhao Fang,

Qing Zhou,

Bin Zhai,

Jialong Hong,

Wanpeng Zhang,

Yao Lu,

Ye Zhang,

Chun Li; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xihang and Fang, Yuhao and Zhou, Qing and Zhai, Bin and Hong, Jialong and Zhang, Wanpeng and Lu, Yao and Zhang, Ye and Li, Chun}, title = {Beyond Missing Modalities: Hypergraph Conditioned Diffusion for Uncertainty-Aware Multimodal Emotion Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22953-22963} }
GRPO-Guard: Mitigating Implicit Over-Optimization in Flow Matching via Regulated Clipping: Jing Wang,

Jiajun Liang,

Jie Liu,

Henglin Liu,

Gongye Liu,

Jun Zheng,

Wanyuan Pang,

Ao Ma,

Zhenyu Xie,

Xintao Wang,

Meng Wang,

Pengfei Wan,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jing and Liang, Jiajun and Liu, Jie and Liu, Henglin and Liu, Gongye and Zheng, Jun and Pang, Wanyuan and Ma, Ao and Xie, Zhenyu and Wang, Xintao and Wang, Meng and Wan, Pengfei and Liang, Xiaodan}, title = {GRPO-Guard: Mitigating Implicit Over-Optimization in Flow Matching via Regulated Clipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5988-5998} }
Target-Aware Invertible Encoder with Reconstruction Guidance for Infrared Small Target Detection: Shule Yan,

Zetian Zhang,

Xiao Ma,

Zexuan Ji; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Shule and Zhang, Zetian and Ma, Xiao and Ji, Zexuan}, title = {Target-Aware Invertible Encoder with Reconstruction Guidance for Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32714-32723} }
Cross-View Distillation and Adaptive Masking for Incomplete Multi-View Multi-Label Classification: Yadong Liu,

Qiaoqi Li,

Yueying Wang,

Lunke Fei,

Jie Wen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yadong and Li, Qiaoqi and Wang, Yueying and Fei, Lunke and Wen, Jie}, title = {Cross-View Distillation and Adaptive Masking for Incomplete Multi-View Multi-Label Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23051-23060} }
Efficient Weighted Sampling via Score-based Generative Models: Heasung Kim,

Taekyun Lee,

Hyeji Kim,

Gustavo De Veciana; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Heasung and Lee, Taekyun and Kim, Hyeji and De Veciana, Gustavo}, title = {Efficient Weighted Sampling via Score-based Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1155-1166} }
SD-FSMIS: Adapting Stable Diffusion for Few-Shot Medical Image Segmentation: Meihua Li,

Yang Zhang,

Weizhao He,

Hu Qu,

Yisong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Meihua and Zhang, Yang and He, Weizhao and Qu, Hu and Li, Yisong}, title = {SD-FSMIS: Adapting Stable Diffusion for Few-Shot Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29979-29989} }
FastLightGen: Fast and Light Video Generation with Fewer Steps and Parameters: Shitong Shao,

Yufei Gu,

Zeke Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Shitong and Gu, Yufei and Xie, Zeke}, title = {FastLightGen: Fast and Light Video Generation with Fewer Steps and Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2104-2114} }
The Invisible Gorilla Effect in Out-of-distribution Detection: Harry Anthony,

Ziyun Liang,

Hermione Warr,

Konstantinos Kamnitsas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Anthony_2026_CVPR, author = {Anthony, Harry and Liang, Ziyun and Warr, Hermione and Kamnitsas, Konstantinos}, title = {The Invisible Gorilla Effect in Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39314-39325} }
Learning by Analogy: A Causal Framework for Compositional Generalization: Lingjing Kong,

Shaoan Xie,

Yang Jiao,

Yetian Chen,

Yanhui Guo,

Simone Shao,

Yan Gao,

Guangyi Chen,

Kun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Lingjing and Xie, Shaoan and Jiao, Yang and Chen, Yetian and Guo, Yanhui and Shao, Simone and Gao, Yan and Chen, Guangyi and Zhang, Kun}, title = {Learning by Analogy: A Causal Framework for Compositional Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36615-36626} }
Prefill-Time Intervention for Mitigating Hallucination in Large Vision-Language Models: Chengsheng Zhang,

Chenghao Sun,

Xinyan Jiang,

Wei Li,

Xinmei Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chengsheng and Sun, Chenghao and Jiang, Xinyan and Li, Wei and Tian, Xinmei}, title = {Prefill-Time Intervention for Mitigating Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25293-25303} }
UniPart: Part-Level 3D Generation with Unified 3D Geom-Seg Latents: Xufan He,

Yushuang Wu,

Xiaoyang Guo,

Chongjie Ye,

Jiaqing Zhou,

Tianlei Hu,

Xiaoguang Han,

Dong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Xufan and Wu, Yushuang and Guo, Xiaoyang and Ye, Chongjie and Zhou, Jiaqing and Hu, Tianlei and Han, Xiaoguang and Du, Dong}, title = {UniPart: Part-Level 3D Generation with Unified 3D Geom-Seg Latents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34227-34236} }
Rotation Invariant and Symmetry Aware Pixel Difference Network for Remote Sensing Object Detection: Jialei Zhan,

Li Liu,

Jiehua Zhang,

Yuhang Xie,

Yongxiang Liu,

Jiangming Chen,

Ming-Ming Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jialei and Liu, Li and Zhang, Jiehua and Xie, Yuhang and Liu, Yongxiang and Chen, Jiangming and Cheng, Ming-Ming}, title = {Rotation Invariant and Symmetry Aware Pixel Difference Network for Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13264-13274} }
Gamba: Mamba-based graph convolutional network with dynamic graph topology learning for action recognition: Rouyi Zhou,

Yangzhi Wu,

Jiajun Wen,

Can Gao,

Feng Liu,

Zhihui Lai,

Linlin Shen; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Rouyi and Wu, Yangzhi and Wen, Jiajun and Gao, Can and Liu, Feng and Lai, Zhihui and Shen, Linlin}, title = {Gamba: Mamba-based graph convolutional network with dynamic graph topology learning for action recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6065-6074} }
Language-Guided One-Step Diffusion Model for Nighttime Flare Removal: Aoxiang Ning,

Kailong Yu,

Minglong Xue,

Liyuan Pan,

Jinhong He,

Wenchao Yan,

Mingliang Zhou,

Yirui Wu; [pdf] [supp]
[bibtex]
@InProceedings{Ning_2026_CVPR, author = {Ning, Aoxiang and Yu, Kailong and Xue, Minglong and Pan, Liyuan and He, Jinhong and Yan, Wenchao and Zhou, Mingliang and Wu, Yirui}, title = {Language-Guided One-Step Diffusion Model for Nighttime Flare Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38442-38452} }
Instance-level Visual Active Tracking with Occlusion-Aware Planning: Haowei Sun,

Kai Zhou,

Hao Gao,

Shiteng Zhang,

Jinwu Hu,

Xutao Wen,

Qixiang Ye,

Mingkui Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Haowei and Zhou, Kai and Gao, Hao and Zhang, Shiteng and Hu, Jinwu and Wen, Xutao and Ye, Qixiang and Tan, Mingkui}, title = {Instance-level Visual Active Tracking with Occlusion-Aware Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42494-42504} }
HandX: Scaling Bimanual Motion and Interaction Generation: Zimu Zhang,

Yucheng Zhang,

Xiyan Xu,

Ziyin Wang,

Sirui Xu,

Kai Zhou,

Bing Zhou,

Chuan Guo,

Jian Wang,

Yu-Xiong Wang,

Liang-Yan Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zimu and Zhang, Yucheng and Xu, Xiyan and Wang, Ziyin and Xu, Sirui and Zhou, Kai and Zhou, Bing and Guo, Chuan and Wang, Jian and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {HandX: Scaling Bimanual Motion and Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2274-2284} }
GeoDiff4D: Geometry-Aware Diffusion for 4D Head Avatar Reconstruction: Chao Xu,

Xiaochen Zhao,

Xiang Deng,

Jingxiang Sun,

Donglin Di,

Zhuo Su,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Chao and Zhao, Xiaochen and Deng, Xiang and Sun, Jingxiang and Di, Donglin and Su, Zhuo and Liu, Yebin}, title = {GeoDiff4D: Geometry-Aware Diffusion for 4D Head Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32485-32495} }
LLaVAShield: Safeguarding Multimodal Multi-Turn Dialogues in Vision-Language Models: Guolei Huang,

Qinzhi Peng,

Gan Xu,

Yao Huang,

Yuxuan Lu,

Yongjun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Guolei and Peng, Qinzhi and Xu, Gan and Huang, Yao and Lu, Yuxuan and Shen, Yongjun}, title = {LLaVAShield: Safeguarding Multimodal Multi-Turn Dialogues in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30130-30140} }
R2G: A Multi-View Circuit Graph Benchmark Suite from RTL to GDSII: Zewei Zhou,

Jiajun Zou,

Jiajia Zhang,

Ao Yang,

Ruichao He,

Haozheng Zhou,

Ao Liu,

Jiawei Liu,

Leilei Jin,

Shan Shen,

Daying Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zewei and Zou, Jiajun and Zhang, Jiajia and Yang, Ao and He, Ruichao and Zhou, Haozheng and Liu, Ao and Liu, Jiawei and Jin, Leilei and Shen, Shan and Sun, Daying}, title = {R2G: A Multi-View Circuit Graph Benchmark Suite from RTL to GDSII}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18827-18836} }
Time-Specialized Event-Image Alignment for Blur-to-Video Decomposition: Zhijing Sun,

Senyan Xu,

Ruixuan Jiang,

Kean Liu,

Runze Tian,

Xueyang Fu,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Zhijing and Xu, Senyan and Jiang, Ruixuan and Liu, Kean and Tian, Runze and Fu, Xueyang and Zha, Zheng-Jun}, title = {Time-Specialized Event-Image Alignment for Blur-to-Video Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8045-8055} }
JUMP-Hand: Learning Joint-wise Uncertainty to Gate Mixture of View Experts for Multi-View 3D Hand Reconstruction: Haohong Kuang,

Yang Xiao,

Changlong Jiang,

Jinghong Zheng,

Hang Xu,

Ran Wang,

Zhiguo Cao,

Joey Tianyi Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Kuang_2026_CVPR, author = {Kuang, Haohong and Xiao, Yang and Jiang, Changlong and Zheng, Jinghong and Xu, Hang and Wang, Ran and Cao, Zhiguo and Zhou, Joey Tianyi}, title = {JUMP-Hand: Learning Joint-wise Uncertainty to Gate Mixture of View Experts for Multi-View 3D Hand Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28348-28357} }
From Manuals to Actions: A Unified VLA Model for Chain-of-Thought Manual Generation and Robotic Manipulation: Chenyang Gu,

Jiaming Liu,

Hao Chen,

Runzhong Huang,

Qingpo Wuwu,

Xiaoqi Li,

Zhuoyang Liu,

Ying Li,

Renrui Zhang,

Peng Jia,

Pheng-Ann Heng,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Chenyang and Liu, Jiaming and Chen, Hao and Huang, Runzhong and Wuwu, Qingpo and Li, Xiaoqi and Liu, Zhuoyang and Li, Ying and Zhang, Renrui and Jia, Peng and Heng, Pheng-Ann and Zhang, Shanghang}, title = {From Manuals to Actions: A Unified VLA Model for Chain-of-Thought Manual Generation and Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13540-13552} }
Anatomical Domain Shifts: Test-time Heterogeneous Adaptation for 3D Human Pose Prediction: Qiongjie Cui,

Pan Zhou,

Jingjing Chen,

Na Zhao; [pdf]
[bibtex]
@InProceedings{Cui_2026_CVPR, author = {Cui, Qiongjie and Zhou, Pan and Chen, Jingjing and Zhao, Na}, title = {Anatomical Domain Shifts: Test-time Heterogeneous Adaptation for 3D Human Pose Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28369-28378} }
Pointing at Parts: Training-Free Few-Shot Grounding in Multimodal LLMs: Shiang-Feng Tsai,

Yuan-Hong Liao,

Jin-Cheng Jhang,

Nan Qiao,

Min Sun; [pdf] [supp]
[bibtex]
@InProceedings{Tsai_2026_CVPR, author = {Tsai, Shiang-Feng and Liao, Yuan-Hong and Jhang, Jin-Cheng and Qiao, Nan and Sun, Min}, title = {Pointing at Parts: Training-Free Few-Shot Grounding in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33922-33932} }
SubspaceAD: Training-Free Few-Shot Anomaly Detection via Subspace Modeling: Camile Lendering,

Erkut Akdag,

Egor Bondarau; [pdf] [supp]
[bibtex]
@InProceedings{Lendering_2026_CVPR, author = {Lendering, Camile and Akdag, Erkut and Bondarau, Egor}, title = {SubspaceAD: Training-Free Few-Shot Anomaly Detection via Subspace Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28557-28566} }
Evolving Contextual Safety in Multi-Modal Large Language Models via Inference-Time Self-Reflective Memory: Ce Zhang,

Jinxi He,

Junyi He,

Katia Sycara,

Yaqi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ce and He, Jinxi and He, Junyi and Sycara, Katia and Xie, Yaqi}, title = {Evolving Contextual Safety in Multi-Modal Large Language Models via Inference-Time Self-Reflective Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41182-41192} }
FlashMesh: Faster and Better Autoregressive Mesh Synthesis via Structured Speculation: Tingrui Shen,

Yiheng Zhang,

Chen Tang,

Chuan Ping,

Zixing Zhao,

Le Wan,

Yuwang Wang,

Ronggang Wang,

Shengfeng He; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Tingrui and Zhang, Yiheng and Tang, Chen and Ping, Chuan and Zhao, Zixing and Wan, Le and Wang, Yuwang and Wang, Ronggang and He, Shengfeng}, title = {FlashMesh: Faster and Better Autoregressive Mesh Synthesis via Structured Speculation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27052-27061} }
SeeThrough3D: Occlusion Aware 3D Control in Text-to-Image Generation: Vaibhav Agrawal,

Rishubh Parihar,

Pradhaan S Bhat,

Ravi Kiran Sarvadevabhatla,

Venkatesh Babu Radhakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agrawal_2026_CVPR, author = {Agrawal, Vaibhav and Parihar, Rishubh and Bhat, Pradhaan S and Sarvadevabhatla, Ravi Kiran and Radhakrishnan, Venkatesh Babu}, title = {SeeThrough3D: Occlusion Aware 3D Control in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25403-25414} }
Exemplar-Free Class Incremental Learning via Preserving Class-Discriminative Structure: Xin Zhang,

Liang Bai,

Guanchao Wang,

Xian Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xin and Bai, Liang and Wang, Guanchao and Yang, Xian}, title = {Exemplar-Free Class Incremental Learning via Preserving Class-Discriminative Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17979-17988} }
RARE: Learn to RAnk and REtrieve for Monocular 3D Object Detection: Hyeonjeong Park,

Peixi Xiong,

Xiaoqian Ruan,

Dian Jia,

Pei Yu,

Wei Tang; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Hyeonjeong and Xiong, Peixi and Ruan, Xiaoqian and Jia, Dian and Yu, Pei and Tang, Wei}, title = {RARE: Learn to RAnk and REtrieve for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11556-11566} }
Do You See What I Am Pointing At? Gesture-Based Egocentric Video Question Answering: Yura Choi,

Roy Miles,

Rolandos Alexandros Potamias,

Ismail Elezi,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Yura and Miles, Roy and Potamias, Rolandos Alexandros and Elezi, Ismail and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Do You See What I Am Pointing At? Gesture-Based Egocentric Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18534-18544} }
3D Gaussian Splatting with Self-Constrained Priors for High Fidelity Surface Reconstruction: Takeshi Noda,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noda_2026_CVPR, author = {Noda, Takeshi and Liu, Yu-Shen and Han, Zhizhong}, title = {3D Gaussian Splatting with Self-Constrained Priors for High Fidelity Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26041-26051} }
VideoSSR: Video Self-Supervised Reinforcement Learning: Zefeng He,

Xiaoye Qu,

Yafu Li,

Siyuan Huang,

Daizong Liu,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Zefeng and Qu, Xiaoye and Li, Yafu and Huang, Siyuan and Liu, Daizong and Cheng, Yu}, title = {VideoSSR: Video Self-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26530-26540} }
Reasoning Diffusion for Unpaired Test Time Out-of-distribution Text-Image to Video Generation: Zirui Pan,

Xin Wang,

Yipeng Zhang,

Hong Chen,

Kecheng Zheng,

Wenwu Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zirui and Wang, Xin and Zhang, Yipeng and Chen, Hong and Zheng, Kecheng and Zhu, Wenwu}, title = {Reasoning Diffusion for Unpaired Test Time Out-of-distribution Text-Image to Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {636-646} }
Disentangling to Re-couple: Resolving the Similarity-Controllability Paradox in Subject-Driven Text-to-Image Generation: Shuang Li,

Chao Deng,

Hang Chen,

Liqun Liu,

Zhenyu Hu,

Te Cao,

Mengge Xue,

Yuan Chen,

Peng Shu,

Huan Yu,

Jie Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shuang and Deng, Chao and Chen, Hang and Liu, Liqun and Hu, Zhenyu and Cao, Te and Xue, Mengge and Chen, Yuan and Shu, Peng and Yu, Huan and Jiang, Jie}, title = {Disentangling to Re-couple: Resolving the Similarity-Controllability Paradox in Subject-Driven Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7741-7751} }
Generalizing Visual Geometry Priors to Sparse Gaussian Occupancy Prediction: Changqing Zhou,

Yueru Luo,

Changhao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Changqing and Luo, Yueru and Chen, Changhao}, title = {Generalizing Visual Geometry Priors to Sparse Gaussian Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28578-28587} }
ReWeaver: Towards Simulation-Ready and Topology-Accurate Garment Reconstruction: Ming Li,

Hui Shan,

Kai Zheng,

Chentao Shen,

Siyu Liu,

Yanwei Fu,

Zhen Chen,

Xiangru Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ming and Shan, Hui and Zheng, Kai and Shen, Chentao and Liu, Siyu and Fu, Yanwei and Chen, Zhen and Huang, Xiangru}, title = {ReWeaver: Towards Simulation-Ready and Topology-Accurate Garment Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4122-4131} }
Scan Clusters, Not Pixels: A Cluster-Centric Paradigm for Efficient Ultra-high-definition Image Restoration: Chen Wu,

Ling Wang,

Zhuoran Zheng,

Yuning Cui,

Zhixiong Yang,

Xiangyu Chen,

Yue Zhang,

Weidong Jiang,

Jingyuan Xia; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chen and Wang, Ling and Zheng, Zhuoran and Cui, Yuning and Yang, Zhixiong and Chen, Xiangyu and Zhang, Yue and Jiang, Weidong and Xia, Jingyuan}, title = {Scan Clusters, Not Pixels: A Cluster-Centric Paradigm for Efficient Ultra-high-definition Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15528-15537} }
SPARROW: Learning Spatial Precision and Temporal Referential Consistency in Pixel-Grounded Video MLLMs: Mohamad Alansari,

Naufal Suryanto,

Divya Velayudhan,

Sajid Javed,

Naoufel Werghi,

Muzammal Naseer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alansari_2026_CVPR, author = {Alansari, Mohamad and Suryanto, Naufal and Velayudhan, Divya and Javed, Sajid and Werghi, Naoufel and Naseer, Muzammal}, title = {SPARROW: Learning Spatial Precision and Temporal Referential Consistency in Pixel-Grounded Video MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17545-17556} }
PETAR: Localized Findings Generation with Mask-Aware Vision-Language Modeling for PET Automated Reporting: Danyal Maqbool,

Changhee Lee,

Zachary Huemann,

Samuel D. Church,

Matthew E. Larson,

Scott B. Perlman,

Tomas A. Romero,

Joshua D. Warner,

Meghan Lubner,

Xin Tie,

Jameson Merkow,

Junjie Hu,

Steve Y. Cho,

Tyler J. Bradshaw; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maqbool_2026_CVPR, author = {Maqbool, Danyal and Lee, Changhee and Huemann, Zachary and Church, Samuel D. and Larson, Matthew E. and Perlman, Scott B. and Romero, Tomas A. and Warner, Joshua D. and Lubner, Meghan and Tie, Xin and Merkow, Jameson and Hu, Junjie and Cho, Steve Y. and Bradshaw, Tyler J.}, title = {PETAR: Localized Findings Generation with Mask-Aware Vision-Language Modeling for PET Automated Reporting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42637-42648} }
PROMPTMINER: Black-Box Prompt Stealing against Text-to-Image Generative Models via Reinforcement Learning and VLM-Guided Optimization: Mingzhe Li,

Renhao Zhang,

Zhiyang Wen,

Siqi Pan,

Bruno Castro da Silva,

Juan Zhai,

Shiqing Ma; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mingzhe and Zhang, Renhao and Wen, Zhiyang and Pan, Siqi and da Silva, Bruno Castro and Zhai, Juan and Ma, Shiqing}, title = {PROMPTMINER: Black-Box Prompt Stealing against Text-to-Image Generative Models via Reinforcement Learning and VLM-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7795-7804} }
Iterative Closed-Loop Motion Synthesis for Scaling the Capabilities of Humanoid Control: Weisheng Xu,

Qiwei Wu,

Jiaxi Zhang,

Jing Tan,

Yangfan Li,

Yuetong Fang,

Jiaqi Xiong,

Kai Wu,

Rong Ou,

Renjing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Weisheng and Wu, Qiwei and Zhang, Jiaxi and Tan, Jing and Li, Yangfan and Fang, Yuetong and Xiong, Jiaqi and Wu, Kai and Ou, Rong and Xu, Renjing}, title = {Iterative Closed-Loop Motion Synthesis for Scaling the Capabilities of Humanoid Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16398-16407} }
Editprint: General Digital Image Forensics via Editing Fingerprint with Self-Augmentation Training: Haiwei Wu,

Kemou Li,

Yuanman Li,

Jiantao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Haiwei and Li, Kemou and Li, Yuanman and Zhou, Jiantao}, title = {Editprint: General Digital Image Forensics via Editing Fingerprint with Self-Augmentation Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35483-35493} }
When Lines Meet Textures: Spatial-Frequency Aligned Diffusion Features for Cross-Sparsity Correspondence: Mingrui Zhu,

Fengzhi Wang,

Xin Wei,

Jun Wang,

Nannan Wang,

Xinbo Gao; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Mingrui and Wang, Fengzhi and Wei, Xin and Wang, Jun and Wang, Nannan and Gao, Xinbo}, title = {When Lines Meet Textures: Spatial-Frequency Aligned Diffusion Features for Cross-Sparsity Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37715-37724} }
Routing on Demand: DSNet for Efficient Progressive Point Cloud Denoising: Xiaoqian Cheng,

Dong Xiao,

Husen Li,

Zheng Liu,

Renjie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Xiaoqian and Xiao, Dong and Li, Husen and Liu, Zheng and Chen, Renjie}, title = {Routing on Demand: DSNet for Efficient Progressive Point Cloud Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39111-39120} }
ElasticFormer: Detecting Objects in HRW Shots via Elastic Computing Vision Transformer: Wenxi Li,

Jingchen Huang,

Chenyang Lyu,

Moran Liu,

Haozhe Lin,

Guiguang Ding,

Yuchen Guo; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenxi and Huang, Jingchen and Lyu, Chenyang and Liu, Moran and Lin, Haozhe and Ding, Guiguang and Guo, Yuchen}, title = {ElasticFormer: Detecting Objects in HRW Shots via Elastic Computing Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32735-32744} }
S2D: Selective Spectral Decay for Quantization-Friendly Conditioning of Neural Activations: Arnav Chavan,

Nahush Lele,

Udbhav Bamba,

Sankalp Dayal,

Aditi Raghunathan,

Deepak Gupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chavan_2026_CVPR, author = {Chavan, Arnav and Lele, Nahush and Bamba, Udbhav and Dayal, Sankalp and Raghunathan, Aditi and Gupta, Deepak}, title = {S2D: Selective Spectral Decay for Quantization-Friendly Conditioning of Neural Activations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12074-12083} }
Pushing the Frontier of Audiovisual Perception with Large-Scale Multimodal Correspondence Learning: Apoorv Vyas,

Heng-Jui Chang,

Cheng-Fu Yang,

Po-Yao Huang,

Luya Gao,

Julius Richter,

Sanyuan Chen,

Matthew Le,

Piotr Dollár,

Christoph Feichtenhofer,

Ann Lee,

Wei-Ning Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vyas_2026_CVPR, author = {Vyas, Apoorv and Chang, Heng-Jui and Yang, Cheng-Fu and Huang, Po-Yao and Gao, Luya and Richter, Julius and Chen, Sanyuan and Le, Matthew and Doll\'ar, Piotr and Feichtenhofer, Christoph and Lee, Ann and Hsu, Wei-Ning}, title = {Pushing the Frontier of Audiovisual Perception with Large-Scale Multimodal Correspondence Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30172-30182} }
PhysGaia: A Physics-aware Benchmark with Multi-Body Interactions for Dynamic Novel View Synthesis: Mijeong Kim,

Gunhee Kim,

Jungyoon Choi,

Wonjae Roh,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Mijeong and Kim, Gunhee and Choi, Jungyoon and Roh, Wonjae and Han, Bohyung}, title = {PhysGaia: A Physics-aware Benchmark with Multi-Body Interactions for Dynamic Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22604-22614} }
NS-Diff: Fluid Navier-Stokes Guided Video Diffusion via Reinforcement Learning: Zijun Deng,

Yuxin Peng; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Zijun and Peng, Yuxin}, title = {NS-Diff: Fluid Navier-Stokes Guided Video Diffusion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43218-43227} }
CaliTex: Geometry-Calibrated Attention for View-Coherent 3D Texture Generation: Chenyu Liu,

Hongze Chen,

Jingzhi Bao,

Lingting Zhu,

Runze Zhang,

Weikai Chen,

Zeyu Hu,

Yingda Yin,

Keyang Luo,

Xin Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chenyu and Chen, Hongze and Bao, Jingzhi and Zhu, Lingting and Zhang, Runze and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Luo, Keyang and Wang, Xin}, title = {CaliTex: Geometry-Calibrated Attention for View-Coherent 3D Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5923-5933} }
DynamicTree: Interactive Real Tree Animation via Sparse Voxel Spectrum: Yaokun Li,

Lihe Ding,

Xiao Chen,

Guang Tan,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yaokun and Ding, Lihe and Chen, Xiao and Tan, Guang and Xue, Tianfan}, title = {DynamicTree: Interactive Real Tree Animation via Sparse Voxel Spectrum}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1241-1251} }
Ghost-FWL: A Large-Scale Full-Waveform LiDAR Dataset for Ghost Detection and Removal: Kazuma Ikeda,

Ryosei Hara,

Rokuto Nagata,

Ozora Sako,

Zihao Ding,

Takahiro Kado,

Ibuki Fujioka,

Taro Beppu,

Mariko Isogawa,

Kentaro Yoshioka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ikeda_2026_CVPR, author = {Ikeda, Kazuma and Hara, Ryosei and Nagata, Rokuto and Sako, Ozora and Ding, Zihao and Kado, Takahiro and Fujioka, Ibuki and Beppu, Taro and Isogawa, Mariko and Yoshioka, Kentaro}, title = {Ghost-FWL: A Large-Scale Full-Waveform LiDAR Dataset for Ghost Detection and Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17164-17173} }
Yo'City: Personalized and Boundless 3D Realistic City Scene Generation via Self-Critic Expansion: Keyang Lu,

Sifan Zhou,

Hongbin Xu,

Gang Xu,

Zhifei Yang,

Yikai Wang,

Zhen Xiao,

Jieyi Long,

Ming Li; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Keyang and Zhou, Sifan and Xu, Hongbin and Xu, Gang and Yang, Zhifei and Wang, Yikai and Xiao, Zhen and Long, Jieyi and Li, Ming}, title = {Yo'City: Personalized and Boundless 3D Realistic City Scene Generation via Self-Critic Expansion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3219-3230} }
LVLM-Aided Alignment of Task-Specific Vision Models: Alexander Koebler,

Lukas Kuhn,

Ingo Thon,

Florian Buettner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koebler_2026_CVPR, author = {Koebler, Alexander and Kuhn, Lukas and Thon, Ingo and Buettner, Florian}, title = {LVLM-Aided Alignment of Task-Specific Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7837-7846} }
Detecting Unknown Objects via Energy-based Separation for Open World Object Detection: Jun-Woo Heo,

Keonhee Park,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2026_CVPR, author = {Heo, Jun-Woo and Park, Keonhee and Park, Gyeong-Moon}, title = {Detecting Unknown Objects via Energy-based Separation for Open World Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27558-27567} }
EpiAgent: An Agent-Centric System for Ancient Inscription Restoration: Shipeng Zhu,

Ang Chen,

Na Nie,

Pengfei Fang,

Min-Ling Zhang,

Hui Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Shipeng and Chen, Ang and Nie, Na and Fang, Pengfei and Zhang, Min-Ling and Xue, Hui}, title = {EpiAgent: An Agent-Centric System for Ancient Inscription Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39304-39313} }
Unified Latent Space for Understanding and Generation via Semantic Auto-encoder: Xiaojie Li,

Yang Zhao,

Ming Li,

Yancheng Zhang,

Zonglin Lyu,

Yunpeng Chen,

Rui Wang,

Daquan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiaojie and Zhao, Yang and Li, Ming and Zhang, Yancheng and Lyu, Zonglin and Chen, Yunpeng and Wang, Rui and Zhou, Daquan}, title = {Unified Latent Space for Understanding and Generation via Semantic Auto-encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2115-2124} }
Diffusion-Based sRGB Real Noise Generation via Prompt-Driven Noise Representation Learning: Jaekyun Ko,

Dongjin Kim,

Soomin Lee,

Guanghui Wang,

Tae Hyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Jaekyun and Kim, Dongjin and Lee, Soomin and Wang, Guanghui and Kim, Tae Hyun}, title = {Diffusion-Based sRGB Real Noise Generation via Prompt-Driven Noise Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35956-35966} }
DiGraphHal-Bench: Evaluating Multimodal Large Language Models on Complex Directed Graphs: Yixin Fan,

Zhao He,

Yuxin Hou,

Changhua Zhou,

Zihao Liu,

Peng Wang,

Chenglong Lu,

Xu Zhang,

Wei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Yixin and He, Zhao and Hou, Yuxin and Zhou, Changhua and Liu, Zihao and Wang, Peng and Lu, Chenglong and Zhang, Xu and Wang, Wei}, title = {DiGraphHal-Bench: Evaluating Multimodal Large Language Models on Complex Directed Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30885-30894} }
Talking Together: Synthesizing Co-Located 3D Conversations from Audio: Mengyi Shan,

Shouchieh Chang,

Ziqian Bai,

Shichen Liu,

Yinda Zhang,

Luchuan Song,

Rohit Pandey,

Sean Fanello,

Zeng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Mengyi and Chang, Shouchieh and Bai, Ziqian and Liu, Shichen and Zhang, Yinda and Song, Luchuan and Pandey, Rohit and Fanello, Sean and Huang, Zeng}, title = {Talking Together: Synthesizing Co-Located 3D Conversations from Audio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3965-3977} }
MERG3R: A Divide-and-Conquer Approach to Large-Scale Neural Visual Geometry: Leo Kaixuan Cheng,

Abdus Shaikh,

Ruofan Liang,

Zhijie Wu,

Yushi Guan,

Nandita Vijaykumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Leo Kaixuan and Shaikh, Abdus and Liang, Ruofan and Wu, Zhijie and Guan, Yushi and Vijaykumar, Nandita}, title = {MERG3R: A Divide-and-Conquer Approach to Large-Scale Neural Visual Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28969-28978} }
Alert-CLIP: Abnormality-aware Latent-Enhanced Representation Tuning of CLIP for Video Anomaly Detection: Yiyan Zhu,

Menghao Zhang,

Haifeng Sun,

Pengfei Ren,

Xianao Chu,

Chenye Xu,

Hong Tan,

Jinghan Wang,

Qi Qi,

Jingyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yiyan and Zhang, Menghao and Sun, Haifeng and Ren, Pengfei and Chu, Xianao and Xu, Chenye and Tan, Hong and Wang, Jinghan and Qi, Qi and Wang, Jingyu}, title = {Alert-CLIP: Abnormality-aware Latent-Enhanced Representation Tuning of CLIP for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35545-35554} }
Dropping Anchor and Spherical Harmonics for Sparse-view Gaussian Splatting: Shuangkang Fang,

I-Chao Shen,

Xuanyang Zhang,

Zesheng Wang,

Yufeng Wang,

Wenrui Ding,

Gang YU,

Takeo Igarashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Shuangkang and Shen, I-Chao and Zhang, Xuanyang and Wang, Zesheng and Wang, Yufeng and Ding, Wenrui and YU, Gang and Igarashi, Takeo}, title = {Dropping Anchor and Spherical Harmonics for Sparse-view Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33312-33322} }
Neural-Centric Video Processing Pipeline for Unified Multi-Task Inference: Seyeon Lee,

Juncheol Ye,

Jaehong Kim,

Dongsu Han; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seyeon and Ye, Juncheol and Kim, Jaehong and Han, Dongsu}, title = {Neural-Centric Video Processing Pipeline for Unified Multi-Task Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18555-18564} }
MICo-150K: A Comprehensive Dataset Advancing Multi-Image Composition: Xinyu Wei,

Kangrui Cen,

Hongyang Wei,

Zhen Guo,

Bairui Li,

Zeqing Wang,

Jinrui Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Xinyu and Cen, Kangrui and Wei, Hongyang and Guo, Zhen and Li, Bairui and Wang, Zeqing and Zhang, Jinrui and Zhang, Lei}, title = {MICo-150K: A Comprehensive Dataset Advancing Multi-Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29695-29706} }
Scalable Multi-View Subspace Clustering with Tensorized Anchor Guidance: Miao Jia,

Xingchen Hu,

Jiyuan Liu,

Siwei Wang,

Min Wang,

Zijian Chen; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Miao and Hu, Xingchen and Liu, Jiyuan and Wang, Siwei and Wang, Min and Chen, Zijian}, title = {Scalable Multi-View Subspace Clustering with Tensorized Anchor Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14367-14376} }
MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis: Yuting Zhang,

Kaishen Yuan,

Hao Lu,

Yutao Yue,

Jintai Chen,

Kaishun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuting and Yuan, Kaishen and Lu, Hao and Yue, Yutao and Chen, Jintai and Wu, Kaishun}, title = {MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35248-35259} }
Good Can Sometimes be Bad: A Unified Attack against 3D Point Cloud Classifier by a Flexible Isotropic Resampling: Linkun Fan,

Jiahao Zhang,

Juntao Zhang,

Lei Zhang,

Fazhi He,

Daojun Han; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Linkun and Zhang, Jiahao and Zhang, Juntao and Zhang, Lei and He, Fazhi and Han, Daojun}, title = {Good Can Sometimes be Bad: A Unified Attack against 3D Point Cloud Classifier by a Flexible Isotropic Resampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42246-42256} }
CryoHype: Reconstructing a thousand cryo-EM structures with transformer-based hypernetworks: Jeffrey Gu,

Minkyu Jeon,

Ambri Ma,

Serena Yeung-Levy,

Ellen D. Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Jeffrey and Jeon, Minkyu and Ma, Ambri and Yeung-Levy, Serena and Zhong, Ellen D.}, title = {CryoHype: Reconstructing a thousand cryo-EM structures with transformer-based hypernetworks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35280-35290} }
Image-Guided Geometric Stylization of 3D Meshes: Changwoon Choi,

Hyunsoo Lee,

Clément Jambon,

Yael Vinker,

Young Min Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Changwoon and Lee, Hyunsoo and Jambon, Cl\'ement and Vinker, Yael and Kim, Young Min}, title = {Image-Guided Geometric Stylization of 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19972-19981} }
PHANTOM: Physics-Infused Video Generation via Joint Modeling of Visual and Latent Physical Dynamics: Ying Shen,

Jerry Xiong,

Tianjiao Yu,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Ying and Xiong, Jerry and Yu, Tianjiao and Lourentzou, Ismini}, title = {PHANTOM: Physics-Infused Video Generation via Joint Modeling of Visual and Latent Physical Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11185-11194} }
Learning Multi-View Spatial Reasoning from Cross-View Relations: Suchae Jeong,

Jaehwi Song,

Haeone Lee,

Hanna Kim,

Jian Kim,

Dongjun Lee,

Dong Kyu Shin,

Changyeon Kim,

Dongyoon Hahm,

Woogyeol Jin,

Juheon Choi,

Kimin Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Suchae and Song, Jaehwi and Lee, Haeone and Kim, Hanna and Kim, Jian and Lee, Dongjun and Shin, Dong Kyu and Kim, Changyeon and Hahm, Dongyoon and Jin, Woogyeol and Choi, Juheon and Lee, Kimin}, title = {Learning Multi-View Spatial Reasoning from Cross-View Relations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2570-2581} }
UCAN: Unified Convolutional Attention Network for Expansive Receptive Fields in Lightweight Super-Resolution: Cao Thien Tan,

Phan Thi Thu Trang,

Do Nghiem Duc,

Ho Ngoc Anh,

Hanyang Zhuang,

Nguyen Duc Dung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Cao Thien and Trang, Phan Thi Thu and Duc, Do Nghiem and Anh, Ho Ngoc and Zhuang, Hanyang and Dung, Nguyen Duc}, title = {UCAN: Unified Convolutional Attention Network for Expansive Receptive Fields in Lightweight Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23409-23418} }
Splat-Based Metal Artifact Reduction in Cone-Beam CT via Compact Attenuation Modeling: Kiseok Choi,

Jaemin Cho,

Inchul Kim,

Min H. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Kiseok and Cho, Jaemin and Kim, Inchul and Kim, Min H.}, title = {Splat-Based Metal Artifact Reduction in Cone-Beam CT via Compact Attenuation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26751-26760} }
Measuring the (Un)Faithfulness of Concept-Based Explanations: Shubham Kumar,

Narendra Ahuja; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Shubham and Ahuja, Narendra}, title = {Measuring the (Un)Faithfulness of Concept-Based Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38990-39000} }
Preserving Source Video Realism: High-Fidelity Face Swapping for Cinematic Quality: Zekai Luo,

Zongze Du,

Zhouhang Zhu,

Hao Zhong,

Muzhi Zhu,

Wen Wang,

Yuling Xi,

Chenchen Jing,

Hao Chen,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Zekai and Du, Zongze and Zhu, Zhouhang and Zhong, Hao and Zhu, Muzhi and Wang, Wen and Xi, Yuling and Jing, Chenchen and Chen, Hao and Shen, Chunhua}, title = {Preserving Source Video Realism: High-Fidelity Face Swapping for Cinematic Quality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40653-40663} }
TRANSPORTER: Transferring Visual Semantics from VLM Manifolds: Alexandros Stergiou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stergiou_2026_CVPR, author = {Stergiou, Alexandros}, title = {TRANSPORTER: Transferring Visual Semantics from VLM Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24129-24140} }
BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates: Phuong-Anh Nguyen,

Tien Anh Pham,

Duc-Trong Le,

Cam-Van Thi Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Phuong-Anh and Pham, Tien Anh and Le, Duc-Trong and Nguyen, Cam-Van Thi}, title = {BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30246-30256} }
PerformRecast: Expression and Head Pose Disentanglement for Portrait Video Editing: Jiadong Liang,

Bojun Xiong,

Jie Tian,

Hua Li,

Xiao Long,

Yong Zheng,

Huan Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Jiadong and Xiong, Bojun and Tian, Jie and Li, Hua and Long, Xiao and Zheng, Yong and Fu, Huan}, title = {PerformRecast: Expression and Head Pose Disentanglement for Portrait Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25131-25141} }
4D Primitive-Mache: Glueing Primitives for Persistent 4D Scene Reconstruction: Kirill Mazur,

Marwan Taher,

Andrew J. Davison; [pdf] [supp]
[bibtex]
@InProceedings{Mazur_2026_CVPR, author = {Mazur, Kirill and Taher, Marwan and Davison, Andrew J.}, title = {4D Primitive-Mache: Glueing Primitives for Persistent 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7372-7381} }
Zero-Shot Reconstruction of Animatable 3D Avatars with Cloth Dynamics from a Single Image: Joohyun Kwon,

Geonhee Sim,

Gyeongsik Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2026_CVPR, author = {Kwon, Joohyun and Sim, Geonhee and Moon, Gyeongsik}, title = {Zero-Shot Reconstruction of Animatable 3D Avatars with Cloth Dynamics from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18182-18192} }
HandDreamer: Zero-Shot Text to 3D Hand Model Generation using Corrective Hand Shape Guidance: Green Rosh,

Prateek Kukreja,

Vishakha SR,

Pawan Prasad B H; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosh_2026_CVPR, author = {Rosh, Green and Kukreja, Prateek and Vishakha, SR and H, Pawan Prasad B}, title = {HandDreamer: Zero-Shot Text to 3D Hand Model Generation using Corrective Hand Shape Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8846-8856} }
Learning Latent Proxies for Controllable Single-Image Relighting: Haoze Zheng,

Zihao Wang,

Xianfeng Wu,

Yajing Bai,

Yexin Liu,

Yun Li,

Xiaogang Xu,

Harry Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haoze and Wang, Zihao and Wu, Xianfeng and Bai, Yajing and Liu, Yexin and Li, Yun and Xu, Xiaogang and Yang, Harry}, title = {Learning Latent Proxies for Controllable Single-Image Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27303-27312} }
Edit-aware RAW reconstruction: Abhijith Punnappurath,

Luxi Zhao,

Ke Zhao,

Hue Nguyen,

Radek Grzeszczuk,

Michael S. Brown; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Punnappurath_2026_CVPR, author = {Punnappurath, Abhijith and Zhao, Luxi and Zhao, Ke and Nguyen, Hue and Grzeszczuk, Radek and Brown, Michael S.}, title = {Edit-aware RAW reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8418-8427} }
View-Aware Semantic Alignment for Aerial-Ground Person Re-Identification: Quan Zhang,

Zeqiang Cai,

Peiming Zhao,

Jingze Wu,

Cailun Wu,

Hongbo Chen,

Jianhuang Lai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Quan and Cai, Zeqiang and Zhao, Peiming and Wu, Jingze and Wu, Cailun and Chen, Hongbo and Lai, Jianhuang}, title = {View-Aware Semantic Alignment for Aerial-Ground Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4383-4392} }
ChangeBridge: Spatiotemporal Image Generation with Multimodal Controls for Remote Senisng: Zhenghui Zhao,

Chen Wu,

Xiangyong Cao,

Di Wang,

Hongruixuan Chen,

Datao Tang,

Liangpei Zhang,

Zhuo Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zhenghui and Wu, Chen and Cao, Xiangyong and Wang, Di and Chen, Hongruixuan and Tang, Datao and Zhang, Liangpei and Zheng, Zhuo}, title = {ChangeBridge: Spatiotemporal Image Generation with Multimodal Controls for Remote Senisng}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27761-27771} }
PhysGM: Large Physical Gaussian Model for Feed-Forward 4D Synthesis: Chunji Lv,

Zequn Chen,

Donglin Di,

Weinan Zhang,

Hao Li,

Chen Wei,

Yinjie Lei,

Changsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2026_CVPR, author = {Lv, Chunji and Chen, Zequn and Di, Donglin and Zhang, Weinan and Li, Hao and Wei, Chen and Lei, Yinjie and Li, Changsheng}, title = {PhysGM: Large Physical Gaussian Model for Feed-Forward 4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29855-29865} }
EmoStyle: Emotion-Driven Image Stylization: Jingyuan Yang,

Zihuan Bai,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jingyuan and Bai, Zihuan and Huang, Hui}, title = {EmoStyle: Emotion-Driven Image Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {594-603} }
AcTTA: Rethinking Test-Time Adaptation via Dynamic Activation: Hyeongyu Kim,

Geonhui Han,

Dosik Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeongyu and Han, Geonhui and Hwang, Dosik}, title = {AcTTA: Rethinking Test-Time Adaptation via Dynamic Activation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22217-22226} }
Prospective Dynamic 3D MRI Reconstruction via Latent-Space Motion Tracking from Single Measurement: Lixuan Chen,

Zhongnan Liu,

Jesse Hamilton,

James M. Balter,

Jeong Joon Park,

Liyue Shen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Lixuan and Liu, Zhongnan and Hamilton, Jesse and Balter, James M. and Park, Jeong Joon and Shen, Liyue}, title = {Prospective Dynamic 3D MRI Reconstruction via Latent-Space Motion Tracking from Single Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5627-5636} }
STARFlow-V: End-to-End Video Generative Modeling with Autoregressive Normalizing Flows: Jiatao Gu,

Ying Shen,

Tianrong Chen,

Laurent Dinh,

Yuyang Wang,

Miguel Angel Bautista,

David Berthelot,

Josh Susskind,

Shuangfei Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Jiatao and Shen, Ying and Chen, Tianrong and Dinh, Laurent and Wang, Yuyang and Bautista, Miguel Angel and Berthelot, David and Susskind, Josh and Zhai, Shuangfei}, title = {STARFlow-V: End-to-End Video Generative Modeling with Autoregressive Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9084-9094} }
High-Fidelity Virtual Try-On beyond Paired Data Scarcity via Diffusion-based Cycle-Consistent Learning: Jia Wu,

Yijing Dai,

Tingfeng Cao,

Meiling Wu,

Tao Luo,

Jian Dong Zhang,

Guangming Lu,

Xiaoyi Zeng; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jia and Dai, Yijing and Cao, Tingfeng and Wu, Meiling and Luo, Tao and Zhang, Jian Dong and Lu, Guangming and Zeng, Xiaoyi}, title = {High-Fidelity Virtual Try-On beyond Paired Data Scarcity via Diffusion-based Cycle-Consistent Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35821-35830} }
Self-guided Semantic Inspection for Zero-Shot Composed Image Retrieval: Jingjing Zhang,

Lei Zhang,

Zheren Fu,

Bo Hu,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingjing and Zhang, Lei and Fu, Zheren and Hu, Bo and Mao, Zhendong}, title = {Self-guided Semantic Inspection for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33881-33890} }
Vision-Language Attribute Disentanglement and Reinforcement for Lifelong Person Re-Identification: Kunlun Xu,

Haotong Cheng,

Jiangmeng Li,

Xu Zou,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Kunlun and Cheng, Haotong and Li, Jiangmeng and Zou, Xu and Zhou, Jiahuan}, title = {Vision-Language Attribute Disentanglement and Reinforcement for Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40397-40406} }
SAGA: Source Attribution of Generative AI Videos: Rohit Kundu,

Vishal Mohanty,

Hao Xiong,

Shan Jia,

Athula Balachandran,

Amit K. Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kundu_2026_CVPR, author = {Kundu, Rohit and Mohanty, Vishal and Xiong, Hao and Jia, Shan and Balachandran, Athula and Roy-Chowdhury, Amit K.}, title = {SAGA: Source Attribution of Generative AI Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21273-21283} }
Dance Across Shifts: Forward-Facilitation Continual Test-Time Adaptation through Dynamic Style Bridging: Zhilin Zhu,

Yabin Wang,

Zhiheng Ma,

Yaguang Song,

Yaowei Wang,

Xiaopeng Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhilin and Wang, Yabin and Ma, Zhiheng and Song, Yaguang and Wang, Yaowei and Hong, Xiaopeng}, title = {Dance Across Shifts: Forward-Facilitation Continual Test-Time Adaptation through Dynamic Style Bridging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32322-32333} }
Enhancing Continual Learning of Vision-Language Models via Dynamic Prefix Weighting: Hyeonseo Jang,

Hyuk Kwon,

Kibok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Hyeonseo and Kwon, Hyuk and Lee, Kibok}, title = {Enhancing Continual Learning of Vision-Language Models via Dynamic Prefix Weighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18042-18052} }
Temporal Inversion for Learning Interval Change in Chest X-Rays: Hanbin Ko,

Kyeongmin Jeon,

Doowoong Choi,

Chang Min Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Hanbin and Jeon, Kyeongmin and Choi, Doowoong and Park, Chang Min}, title = {Temporal Inversion for Learning Interval Change in Chest X-Rays}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28338-28347} }
PixARMesh: Autoregressive Mesh-Native Single-View Scene Reconstruction: Xiang Zhang,

Sohyun Yoo,

Hongrui Wu,

Chuan Li,

Jianwen Xie,

Zhuowen Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiang and Yoo, Sohyun and Wu, Hongrui and Li, Chuan and Xie, Jianwen and Tu, Zhuowen}, title = {PixARMesh: Autoregressive Mesh-Native Single-View Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5881-5891} }
PS-SR: Pseudo-Single-Step Video Super-Resolution via Speculative Diffusion: Aiqiu Wu,

Zhaofan Qiu,

Ting Yao,

Tao Mei; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Aiqiu and Qiu, Zhaofan and Yao, Ting and Mei, Tao}, title = {PS-SR: Pseudo-Single-Step Video Super-Resolution via Speculative Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38218-38227} }
ARES: Unifying Asymmetric RGB-Event Stereo for Probabilistic Scene Flow Estimation: Jie Long Lee,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Jie Long and Lee, Gim Hee}, title = {ARES: Unifying Asymmetric RGB-Event Stereo for Probabilistic Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37022-37031} }
SelfHVD: Self-Supervised Handheld Video Deblurring: Honglei Xu,

Zhilu Zhang,

Junjie Fan,

Xiaohe Wu,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Honglei and Zhang, Zhilu and Fan, Junjie and Wu, Xiaohe and Zuo, Wangmeng}, title = {SelfHVD: Self-Supervised Handheld Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37486-37495} }
RFDM: Residual Flow Diffusion Models for Video Editing: Mohammadreza Salehi,

Mehdi Noroozi,

Luca Morreale,

Ruchika Chavhan,

Malcolm Chadwick,

Alberto Gil Couto Pimentel Ramos,

Abhinav Mehrotra; [pdf] [supp]
[bibtex]
@InProceedings{Salehi_2026_CVPR, author = {Salehi, Mohammadreza and Noroozi, Mehdi and Morreale, Luca and Chavhan, Ruchika and Chadwick, Malcolm and Gil Couto Pimentel Ramos, Alberto and Mehrotra, Abhinav}, title = {RFDM: Residual Flow Diffusion Models for Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43514-43524} }
The Power of Decaying Steps: Enhancing Attack Stability and Transferability for Sign-based Optimizers: Wei Tao,

Yang Dai,

Jincai Huang,

Qing Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Wei and Dai, Yang and Huang, Jincai and Tao, Qing}, title = {The Power of Decaying Steps: Enhancing Attack Stability and Transferability for Sign-based Optimizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42300-42309} }
Multi-Scale Gradient-Guided Unrolling Architecture with Adaptive Mamba for Compressive Sensing: Le Yang,

Hongping Gan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Le and Gan, Hongping}, title = {Multi-Scale Gradient-Guided Unrolling Architecture with Adaptive Mamba for Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41794-41803} }
OmniVGGT: Omni-Modality Driven Visual Geometry Grounded Transformer: Haosong Peng,

Hao Li,

Yalun Dai,

Yushi Lan,

Yihang Luo,

Tianyu Qi,

Zhengshen Zhang,

Yufeng Zhan,

Junfei Zhang,

Wenchao Xu,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Haosong and Li, Hao and Dai, Yalun and Lan, Yushi and Luo, Yihang and Qi, Tianyu and Zhang, Zhengshen and Zhan, Yufeng and Zhang, Junfei and Xu, Wenchao and Liu, Ziwei}, title = {OmniVGGT: Omni-Modality Driven Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36485-36497} }
Bayesian Decomposition and Semantic Completion for Few-shot Semantic Segmentation: Guangchen Shi,

Yirui Wu,

Wei Zhu,

Tao Wang,

Hao Zhang,

Bo Li,

Tong Lu; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Guangchen and Wu, Yirui and Zhu, Wei and Wang, Tao and Zhang, Hao and Li, Bo and Lu, Tong}, title = {Bayesian Decomposition and Semantic Completion for Few-shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12354-12363} }
Natural Human Motion Recovery by Aligning High-Order Temporal Dynamics from Monocular Videos: Dingkun Wei,

Zehong Shen,

Yan Xia,

Georgios Pavlakos,

Yujun Shen,

Xiaowei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Dingkun and Shen, Zehong and Xia, Yan and Pavlakos, Georgios and Shen, Yujun and Zhou, Xiaowei}, title = {Natural Human Motion Recovery by Aligning High-Order Temporal Dynamics from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7187-7196} }
Ego: Embedding-Guided Personalization of Vision-Language Models: Soroush Seifi,

Simon Gardier,

Vaggelis Dorovatas,

Daniel Olmeda Reino,

Rahaf Aljundi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seifi_2026_CVPR, author = {Seifi, Soroush and Gardier, Simon and Dorovatas, Vaggelis and Reino, Daniel Olmeda and Aljundi, Rahaf}, title = {Ego: Embedding-Guided Personalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11674-11683} }
Think, Then Verify: A Hypothesis-Verification Multi-Agent Framework for Long Video Understanding: Zheng Wang,

Haoran Chen,

Haoxuan Qin,

Zhipeng Wei,

Tianwen Qian,

Cong Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zheng and Chen, Haoran and Qin, Haoxuan and Wei, Zhipeng and Qian, Tianwen and Bai, Cong}, title = {Think, Then Verify: A Hypothesis-Verification Multi-Agent Framework for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33784-33793} }
MoVie: Broaden Your Views with Human Motion for Action Detection: Di Yang,

Mahmoud Ali,

Xuanlong Yu,

Xi Shen,

Quan Kong,

Gianpiero Francesca,

François Brémond; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Di and Ali, Mahmoud and Yu, Xuanlong and Shen, Xi and Kong, Quan and Francesca, Gianpiero and Br\'emond, Fran\c{c}ois}, title = {MoVie: Broaden Your Views with Human Motion for Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27313-27323} }
CodeDance: A Dynamic Tool-integrated MLLM for Executable Visual Reasoning: Qi Song,

Honglin Li,

Yingchen Yu,

Haoyi Zhou,

Lin Yang,

Song Bai,

Qi She,

Zilong Huang,

Yunqing Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Qi and Li, Honglin and Yu, Yingchen and Zhou, Haoyi and Yang, Lin and Bai, Song and She, Qi and Huang, Zilong and Zhao, Yunqing}, title = {CodeDance: A Dynamic Tool-integrated MLLM for Executable Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19186-19195} }
SpatialTree: How Spatial Intelligence Branches Out in MLLMs: Yuxi Xiao,

Longfei Li,

Shen Yan,

Xinhang Liu,

Sida Peng,

Yunchao Wei,

Xiaowei Zhou,

Bingyi Kang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Yuxi and Li, Longfei and Yan, Shen and Liu, Xinhang and Peng, Sida and Wei, Yunchao and Zhou, Xiaowei and Kang, Bingyi}, title = {SpatialTree: How Spatial Intelligence Branches Out in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16701-16711} }
HyperST: Hierarchical Hyperbolic Learning for Spatial Transcriptomics Prediction: Chen Zhang,

Yilu An,

Ying Chen,

Hao Li,

Xitong Ling,

Lihao Liu,

Junjun He,

Yuxiang Lin,

Zihui Wang,

Rongshan Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chen and An, Yilu and Chen, Ying and Li, Hao and Ling, Xitong and Liu, Lihao and He, Junjun and Lin, Yuxiang and Wang, Zihui and Yu, Rongshan}, title = {HyperST: Hierarchical Hyperbolic Learning for Spatial Transcriptomics Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5730-5739} }
Towards Robust Sequential Decomposition for Complex Image Editing: Zilai Zeng,

Mingdeng Cao,

Zijie Li,

Xiaochen Lian,

Yichun Shi,

Peihao Zhu,

Chen Sun,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Zilai and Cao, Mingdeng and Li, Zijie and Lian, Xiaochen and Shi, Yichun and Zhu, Peihao and Sun, Chen and Wang, Peng}, title = {Towards Robust Sequential Decomposition for Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38101-38110} }
Prototypical Action Reasoning Facilitated by Vision-Language Alignment for Egocentric Action Anticipation: Jiang Shao,

Xinbo Zhao,

Wenyin Tuo,

Xiaochun Zou; [pdf]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Jiang and Zhao, Xinbo and Tuo, Wenyin and Zou, Xiaochun}, title = {Prototypical Action Reasoning Facilitated by Vision-Language Alignment for Egocentric Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24000-24009} }
PECCVAI: Overcoming the Brittleness of AI Image Watermarking Under Visual Paraphrasing Attacks: Shreyas Dixit,

Ashhar Aziz,

Shashwat Bajpai,

Vasu Sharma,

Aman Chadha,

Vinija Jain,

Amitava Das; [pdf] [supp]
[bibtex]
@InProceedings{Dixit_2026_CVPR, author = {Dixit, Shreyas and Aziz, Ashhar and Bajpai, Shashwat and Sharma, Vasu and Chadha, Aman and Jain, Vinija and Das, Amitava}, title = {PECCVAI: Overcoming the Brittleness of AI Image Watermarking Under Visual Paraphrasing Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24471-24480} }
STAC: Plug-and-Play Spatio-Temporal Aware Cache Compression for Streaming 3D Reconstruction: Runze Wang,

Yuxuan Song,

Youcheng Cai,

Ligang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Runze and Song, Yuxuan and Cai, Youcheng and Liu, Ligang}, title = {STAC: Plug-and-Play Spatio-Temporal Aware Cache Compression for Streaming 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7567-7576} }
D^3FER: Dual Channel and Dual Branch Network for Robust Facial Expression Recognition under Dual Challenges: Hui Tang,

Yifan He,

Zhong Jin; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Hui and He, Yifan and Jin, Zhong}, title = {D{\textasciicircum}3FER: Dual Channel and Dual Branch Network for Robust Facial Expression Recognition under Dual Challenges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18085-18095} }
GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation: Jiahao Yang,

Zihan Wang,

Xiangyang Li,

Xing Zhu,

Yujun Shen,

Yinghao Xu,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jiahao and Wang, Zihan and Li, Xiangyang and Zhu, Xing and Shen, Yujun and Xu, Yinghao and Jiang, Shuqiang}, title = {GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40053-40063} }
SAME: Sparse and Anchored Model Editing for Heterogeneous Incremental Learning under Limited Data: Zixuan Duan,

Zeyu Zhang,

Fengyuan Lu,

Shaofeng Zhang,

Wenbin Li,

Qi Fan,

Yang Gao; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Zixuan and Zhang, Zeyu and Lu, Fengyuan and Zhang, Shaofeng and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {SAME: Sparse and Anchored Model Editing for Heterogeneous Incremental Learning under Limited Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25055-25065} }
Context-Nav: Context-Driven Exploration and Viewpoint-Aware 3D Spatial Reasoning for Instance Navigation: Won Shik Jang,

Ue-Hwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2026_CVPR, author = {Jang, Won Shik and Kim, Ue-Hwan}, title = {Context-Nav: Context-Driven Exploration and Viewpoint-Aware 3D Spatial Reasoning for Instance Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9626-9636} }
QVGGT: Post-Training Quantized Visual Geometry Grounded Transformer: Zhizhen Pan,

Hesong Wang,

Huan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zhizhen and Wang, Hesong and Wang, Huan}, title = {QVGGT: Post-Training Quantized Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7536-7545} }
OptiMVMap: Offline Vectorized Map Construction via Optimal Multi-vehicle Perspectives: Zedong Dan,

Zijie Wang,

Wei Zhang,

Xiangru Lin,

Weiming Zhang,

Xiao Tan,

Jingdong Wang,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dan_2026_CVPR, author = {Dan, Zedong and Wang, Zijie and Zhang, Wei and Lin, Xiangru and Zhang, Weiming and Tan, Xiao and Wang, Jingdong and Lin, Liang and Li, Guanbin}, title = {OptiMVMap: Offline Vectorized Map Construction via Optimal Multi-vehicle Perspectives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18140-18149} }
Learning Explicit Continuous Motion Representation for Dynamic Gaussian Splatting from Monocular Videos: Xuankai Zhang,

Junjin Xiao,

Shangwei Huang,

Wei-shi Zheng,

Qing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuankai and Xiao, Junjin and Huang, Shangwei and Zheng, Wei-shi and Zhang, Qing}, title = {Learning Explicit Continuous Motion Representation for Dynamic Gaussian Splatting from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33291-33300} }
Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning: Hao Zhou,

Tiru Wu,

Yan Jiang,

Wanqi Zhou,

Junxing Hu,

Ai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Wu, Tiru and Jiang, Yan and Zhou, Wanqi and Hu, Junxing and Han, Ai}, title = {Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42331-42340} }
MotionV2V: Editing Motion in a Video: Ryan Burgert,

Charles Herrmann,

Forrester Cole,

Michael S Ryoo,

Neal Wadhwa,

Andrey Voynov,

Nataniel Ruiz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Burgert_2026_CVPR, author = {Burgert, Ryan and Herrmann, Charles and Cole, Forrester and Ryoo, Michael S and Wadhwa, Neal and Voynov, Andrey and Ruiz, Nataniel}, title = {MotionV2V: Editing Motion in a Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35988-35997} }
Human-Centric Multi-Exposure Fusion: Benchmark and Bi-level Cognition Distillation Framework: Jingjie Shang,

Tengyu Ma,

Heng Zhang,

Jinyuan Liu,

Risheng Liu,

Yuan Wang,

Xiaochen Bo; [pdf] [supp]
[bibtex]
@InProceedings{Shang_2026_CVPR, author = {Shang, Jingjie and Ma, Tengyu and Zhang, Heng and Liu, Jinyuan and Liu, Risheng and Wang, Yuan and Bo, Xiaochen}, title = {Human-Centric Multi-Exposure Fusion: Benchmark and Bi-level Cognition Distillation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26572-26581} }
DC-Merge: Improving Model Merging with Directional Consistency: Han-Chen Zhang,

Zi-Hao Zhou,

Mao-Lin Luo,

Shimin Di,

Min-Ling Zhang,

Tong Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Han-Chen and Zhou, Zi-Hao and Luo, Mao-Lin and Di, Shimin and Zhang, Min-Ling and Wei, Tong}, title = {DC-Merge: Improving Model Merging with Directional Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22248-22258} }
Random Wins All: Rethinking Grouping Strategies for Vision Tokens: Qihang Fan,

Yuang Ai,

Huaibo Huang,

Ran He; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2026_CVPR, author = {Fan, Qihang and Ai, Yuang and Huang, Huaibo and He, Ran}, title = {Random Wins All: Rethinking Grouping Strategies for Vision Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27357-27366} }
R-4B: Incentivizing General-Purpose Auto-Thinking in MLLMs via Bi-Mode Annealing and Reinforce Learning: Qi Yang,

Bolin Ni,

Shiming Xiang,

Houwen Peng; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Qi and Ni, Bolin and Xiang, Shiming and Peng, Houwen}, title = {R-4B: Incentivizing General-Purpose Auto-Thinking in MLLMs via Bi-Mode Annealing and Reinforce Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7891-7900} }
When Robots Obey the Patch: Universal Transferable Patch Attacks on Vision-Language-Action Models: Hui Lu,

Yi Yu,

Yiming Yang,

Chenyu Yi,

Qixin Zhang,

Bingquan Shen,

Alex C. Kot,

Xudong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Hui and Yu, Yi and Yang, Yiming and Yi, Chenyu and Zhang, Qixin and Shen, Bingquan and Kot, Alex C. and Jiang, Xudong}, title = {When Robots Obey the Patch: Universal Transferable Patch Attacks on Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22867-22878} }
Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling: Long Tang,

Huiyu Duan,

Guoquan Zheng,

Jianbo Zhang,

Jie Hao,

Liang Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Long and Duan, Huiyu and Zheng, Guoquan and Zhang, Jianbo and Hao, Jie and Yuan, Liang}, title = {Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29866-29876} }
Seeing through boxes: Non-Line-of-Sight 3D Reconstruction from Radar Signals: Jiachen Lu,

Hailan Shanbhag,

Haitham Al Hassanieh; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jiachen and Shanbhag, Hailan and Al Hassanieh, Haitham}, title = {Seeing through boxes: Non-Line-of-Sight 3D Reconstruction from Radar Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1221-1230} }
Efficient Training for Human Video Generation with Entropy-Guided Prioritized Progressive Learning: Changlin Li,

Jiawei Zhang,

Shuhao Liu,

Sihao Lin,

Zeyi Shi,

Zhihui Li,

Xiaojun Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Changlin and Zhang, Jiawei and Liu, Shuhao and Lin, Sihao and Shi, Zeyi and Li, Zhihui and Chang, Xiaojun}, title = {Efficient Training for Human Video Generation with Entropy-Guided Prioritized Progressive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5967-5977} }
CoT-Edit: Let CoT Guide Instruction Video Editing: Sen Liang,

Fengbin Guan,

Youliang Zhang,

Xin Li,

Zhibo Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Sen and Guan, Fengbin and Zhang, Youliang and Li, Xin and Chen, Zhibo}, title = {CoT-Edit: Let CoT Guide Instruction Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37960-37970} }
Sparse Spectral LoRA: Routed Experts for Medical VLMs: Omid Nejatimanzari,

Hojat Asgariandehkordi,

Taha Koleilat,

Yiming Xiao,

Hassan Rivaz; [pdf] [supp]
[bibtex]
@InProceedings{Nejatimanzari_2026_CVPR, author = {Nejatimanzari, Omid and Asgariandehkordi, Hojat and Koleilat, Taha and Xiao, Yiming and Rivaz, Hassan}, title = {Sparse Spectral LoRA: Routed Experts for Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35351-35362} }
MFEN: Multi-Frequency Expert Network for Visible-Infrared Person Re-ID: Xulin Li,

Yan Lu,

Bin Liu,

Qinhong Yang,

Qi Chu,

Tao Gong,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xulin and Lu, Yan and Liu, Bin and Yang, Qinhong and Chu, Qi and Gong, Tao and Yu, Nenghai}, title = {MFEN: Multi-Frequency Expert Network for Visible-Infrared Person Re-ID}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18471-18480} }
UniRain: Unified Image Deraining with RAG-based Dataset Distillation and Multi-objective Reweighted Optimization: Qianfeng Yang,

Qiyuan Guan,

Xiang Chen,

Jiyu Jin,

Guiyue Jin,

Jiangxin Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Qianfeng and Guan, Qiyuan and Chen, Xiang and Jin, Jiyu and Jin, Guiyue and Dong, Jiangxin}, title = {UniRain: Unified Image Deraining with RAG-based Dataset Distillation and Multi-objective Reweighted Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12428-12437} }
From Softmax to Dirichlet: Evidential Learning for Semi-supervised Semantic Segmentation: Huayu Mai,

Rui Sun,

Yujia Chen,

Wangkai Li,

Bingzhou Wang,

Aibing Li,

Zhangyu He,

Yuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Mai_2026_CVPR, author = {Mai, Huayu and Sun, Rui and Chen, Yujia and Li, Wangkai and Wang, Bingzhou and Li, Aibing and He, Zhangyu and Wang, Yuan}, title = {From Softmax to Dirichlet: Evidential Learning for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27697-27707} }
FlowDirector: Training-Free Flow Steering for Precise Text-to-Video Editing: Guangzhao Li,

Yanming Yang,

Chenxi Song,

Xiaohong Liu,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Guangzhao and Yang, Yanming and Song, Chenxi and Liu, Xiaohong and Zhang, Chi}, title = {FlowDirector: Training-Free Flow Steering for Precise Text-to-Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7805-7815} }
ImageRAGTurbo: Towards One-step Text-to-Image Generation with Retrieval-Augmented Diffusion Models: Peijie Qiu,

Hariharan Ramshankar,

Arnau Ramisa,

Amit Kumar K C,

René Vidal,

Vamsi Salaka,

Rahul Bhagat; [pdf] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Peijie and Ramshankar, Hariharan and Ramisa, Arnau and C, Amit Kumar K and Vidal, Ren\'e and Salaka, Vamsi and Bhagat, Rahul}, title = {ImageRAGTurbo: Towards One-step Text-to-Image Generation with Retrieval-Augmented Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {529-539} }
Principled Steering via Null-space Projection for Jailbreak Defense in Vision-Language Models: Xingyu Zhu,

Beier Zhu,

Shuo Wang,

Junfeng Fang,

Kesen Zhao,

Hanwang Zhang,

Xiangnan He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingyu and Zhu, Beier and Wang, Shuo and Fang, Junfeng and Zhao, Kesen and Zhang, Hanwang and He, Xiangnan}, title = {Principled Steering via Null-space Projection for Jailbreak Defense in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22889-22899} }
DiT-IC: Aligned Diffusion Transformer for Efficient Image Compression: Junqi Shi,

Ming Lu,

Xingchen Li,

Anle Ke,

Ruiqi Zhang,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Junqi and Lu, Ming and Li, Xingchen and Ke, Anle and Zhang, Ruiqi and Ma, Zhan}, title = {DiT-IC: Aligned Diffusion Transformer for Efficient Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25719-25729} }
PointTPA: Dynamic Network Parameter Adaptation for 3D Scene Understanding: Siyuan Liu,

Chaoqun Zheng,

Xin Zhou,

Tianrui Feng,

Dingkang Liang,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Siyuan and Zheng, Chaoqun and Zhou, Xin and Feng, Tianrui and Liang, Dingkang and Bai, Xiang}, title = {PointTPA: Dynamic Network Parameter Adaptation for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36571-36581} }
Can Natural Image Autoencoders Compactly Tokenize fMRI Volumes for Long-Range Dynamics Modeling?: Peter Yongho Kim,

Juhyeon Park,

Jungwoo Park,

Jubin Choi,

Jungwoo Seo,

Jiook Cha,

Taesup Moon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Peter Yongho and Park, Juhyeon and Park, Jungwoo and Choi, Jubin and Seo, Jungwoo and Cha, Jiook and Moon, Taesup}, title = {Can Natural Image Autoencoders Compactly Tokenize fMRI Volumes for Long-Range Dynamics Modeling?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35321-35330} }
Dexterous World Models: Byungjun Kim,

Taeksoo Kim,

Junyoung Lee,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Byungjun and Kim, Taeksoo and Lee, Junyoung and Joo, Hanbyul}, title = {Dexterous World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29663-29673} }
You Only Erase Once: Erasing Anything without Bringing Unexpected Content: Yixing Zhu,

Qing Zhang,

Wenju Xu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yixing and Zhang, Qing and Xu, Wenju and Zheng, Wei-Shi}, title = {You Only Erase Once: Erasing Anything without Bringing Unexpected Content}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43197-43207} }
TokenGS: Decoupling 3D Gaussian Prediction from Pixels with Learnable Tokens: Jiawei Ren,

Michal Jan Tyszkiewicz,

Jiahui Huang,

Zan Gojcic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Jiawei and Tyszkiewicz, Michal Jan and Huang, Jiahui and Gojcic, Zan}, title = {TokenGS: Decoupling 3D Gaussian Prediction from Pixels with Learnable Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15365-15375} }
EditMGT: Unleashing Potentials of Masked Generative Transformers in Image Editing: Wei Chow,

Linfeng Li,

Lingdong Kong,

Zefeng Li,

Qi Xu,

Hang Song,

Tian Ye,

Xian Wang,

Jinbin Bai,

Shilin Xu,

Xiangtai Li,

Junting Pan,

Shaoteng Liu,

Ran Zhou,

Tianshu Yang,

Songhua Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chow_2026_CVPR, author = {Chow, Wei and Li, Linfeng and Kong, Lingdong and Li, Zefeng and Xu, Qi and Song, Hang and Ye, Tian and Wang, Xian and Bai, Jinbin and Xu, Shilin and Li, Xiangtai and Pan, Junting and Liu, Shaoteng and Zhou, Ran and Yang, Tianshu and Liu, Songhua}, title = {EditMGT: Unleashing Potentials of Masked Generative Transformers in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38038-38048} }
World in a Frame: Understanding Culture Mixing as a New Challenge for Vision-Language Models: Eunsu Kim,

Junyeong Park,

Na Min An,

Junseong Kim,

Hitesh Laxmichand Patel,

Jiho Jin,

Julia Kruk,

Amit Agarwal,

Srikant Panda,

Fenal Ashokbhai Ilasariya,

Hyunjung Shim,

Alice Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Eunsu and Park, Junyeong and An, Na Min and Kim, Junseong and Patel, Hitesh Laxmichand and Jin, Jiho and Kruk, Julia and Agarwal, Amit and Panda, Srikant and Ilasariya, Fenal Ashokbhai and Shim, Hyunjung and Oh, Alice}, title = {World in a Frame: Understanding Culture Mixing as a New Challenge for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2477-2489} }
NuWa: Deriving Lightweight Class-Specific Vision Transformers for Edge Devices: Ziteng Wei,

Qiang He,

Bing Li,

Feifei Chen,

Hai Jin,

Yun Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Ziteng and He, Qiang and Li, Bing and Chen, Feifei and Jin, Hai and Yang, Yun}, title = {NuWa: Deriving Lightweight Class-Specific Vision Transformers for Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {156-166} }
SCAPO: Self-Supervised Category-Level Articulated Pose Estimation from a Single 3D Observation: Can Zhang,

Gim Hee Lee; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Can and Lee, Gim Hee}, title = {SCAPO: Self-Supervised Category-Level Articulated Pose Estimation from a Single 3D Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13802-13811} }
CGHair: Compact Gaussian Hair Reconstruction with Card Clustering: Haimin Luo,

Srinjay Sarkar,

Albert Mosella-Montoro,

Francisco Vicente Carrasco,

Fernando De la Torre; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Haimin and Sarkar, Srinjay and Mosella-Montoro, Albert and Carrasco, Francisco Vicente and De la Torre, Fernando}, title = {CGHair: Compact Gaussian Hair Reconstruction with Card Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25225-25235} }
Frequency-domain Manipulation for Face Obfuscation: Jintae Kim,

Keunsoo Ko,

Chang-Su Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Jintae and Ko, Keunsoo and Kim, Chang-Su}, title = {Frequency-domain Manipulation for Face Obfuscation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10241-10250} }
E3AD: An Emotion-Aware Vision-Language-Action Model for Human-Centric End-to-End Autonomous Driving: Yihong Tang,

Haicheng Liao,

Tong Nie,

Junlin He,

Ao Qu,

Kehua Chen,

Wei Ma,

Zhenning Li,

Lijun Sun,

Chengzhong Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yihong and Liao, Haicheng and Nie, Tong and He, Junlin and Qu, Ao and Chen, Kehua and Ma, Wei and Li, Zhenning and Sun, Lijun and Xu, Chengzhong}, title = {E3AD: An Emotion-Aware Vision-Language-Action Model for Human-Centric End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10610-10620} }; Back