CVPR 2025 Open Access Repository

Papers

Back
Towards Source-Free Machine Unlearning: Sk Miraj Ahmed,

Umit Yigit Basaran,

Dripta S. Raychaudhuri,

Arindam Dutta,

Rohit Kundu,

Fahim Faisal Niloy,

Basak Guler,

Amit K. Roy-Chowdhury; [pdf] [supp]
[bibtex]
@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Sk Miraj and Basaran, Umit Yigit and Raychaudhuri, Dripta S. and Dutta, Arindam and Kundu, Rohit and Niloy, Fahim Faisal and Guler, Basak and Roy-Chowdhury, Amit K.}, title = {Towards Source-Free Machine Unlearning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4948-4957} }
Uni4D: Unifying Visual Foundation Models for 4D Modeling from a Single Video: David Yifan Yao,

Albert J. Zhai,

Shenlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, David Yifan and Zhai, Albert J. and Wang, Shenlong}, title = {Uni4D: Unifying Visual Foundation Models for 4D Modeling from a Single Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1116-1126} }
Hyperbolic Category Discovery: Yuanpei Liu,

Zhenqi He,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanpei and He, Zhenqi and Han, Kai}, title = {Hyperbolic Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9891-9900} }
The Language of Motion: Unifying Verbal and Non-verbal Language of 3D Human Motion: Changan Chen,

Juze Zhang,

Shrinidhi K. Lakshmikanth,

Yusu Fang,

Ruizhi Shao,

Gordon Wetzstein,

Li Fei-Fei,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Changan and Zhang, Juze and Lakshmikanth, Shrinidhi K. and Fang, Yusu and Shao, Ruizhi and Wetzstein, Gordon and Fei-Fei, Li and Adeli, Ehsan}, title = {The Language of Motion: Unifying Verbal and Non-verbal Language of 3D Human Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6200-6211} }
CALICO: Part-Focused Semantic Co-Segmentation with Large Vision-Language Models: Kiet A. Nguyen,

Adheesh Juvekar,

Tianjiao Yu,

Muntasir Wahed,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Kiet A. and Juvekar, Adheesh and Yu, Tianjiao and Wahed, Muntasir and Lourentzou, Ismini}, title = {CALICO: Part-Focused Semantic Co-Segmentation with Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4550-4561} }
Words or Vision: Do Vision-Language Models Have Blind Faith in Text?: Ailin Deng,

Tri Cao,

Zhirui Chen,

Bryan Hooi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Ailin and Cao, Tri and Chen, Zhirui and Hooi, Bryan}, title = {Words or Vision: Do Vision-Language Models Have Blind Faith in Text?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3867-3876} }
Learning to Detect Objects from Multi-Agent LiDAR Scans without Manual Labels: Qiming Xia,

Wenkai Lin,

Haoen Xiang,

Xun Huang,

Siheng Chen,

Zhen Dong,

Cheng Wang,

Chenglu Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Qiming and Lin, Wenkai and Xiang, Haoen and Huang, Xun and Chen, Siheng and Dong, Zhen and Wang, Cheng and Wen, Chenglu}, title = {Learning to Detect Objects from Multi-Agent LiDAR Scans without Manual Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1418-1428} }
DeepLA-Net: Very Deep Local Aggregation Networks for Point Cloud Analysis: Ziyin Zeng,

Mingyue Dong,

Jian Zhou,

Huan Qiu,

Zhen Dong,

Man Luo,

Bijun Li; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Ziyin and Dong, Mingyue and Zhou, Jian and Qiu, Huan and Dong, Zhen and Luo, Man and Li, Bijun}, title = {DeepLA-Net: Very Deep Local Aggregation Networks for Point Cloud Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1330-1341} }
Multi-Layer Visual Feature Fusion in Multimodal LLMs: Methods, Analysis, and Best Practices: Junyan Lin,

Haoran Chen,

Yue Fan,

Yingqi Fan,

Xin Jin,

Hui Su,

Jinlan Fu,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Junyan and Chen, Haoran and Fan, Yue and Fan, Yingqi and Jin, Xin and Su, Hui and Fu, Jinlan and Shen, Xiaoyu}, title = {Multi-Layer Visual Feature Fusion in Multimodal LLMs: Methods, Analysis, and Best Practices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4156-4166} }
APHQ-ViT: Post-Training Quantization with Average Perturbation Hessian Based Reconstruction for Vision Transformers: Zhuguanyu Wu,

Jiayi Zhang,

Jiaxin Chen,

Jinyang Guo,

Di Huang,

Yunhong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Zhuguanyu and Zhang, Jiayi and Chen, Jiaxin and Guo, Jinyang and Huang, Di and Wang, Yunhong}, title = {APHQ-ViT: Post-Training Quantization with Average Perturbation Hessian Based Reconstruction for Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9686-9695} }
AdaptCMVC: Robust Adaption to Incremental Views in Continual Multi-view Clustering: Jing Wang,

Songhe Feng,

Kristoffer Knutsen Wickstrøm,

Michael C. Kampffmeyer; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jing and Feng, Songhe and Wickstr{\o}m, Kristoffer Knutsen and Kampffmeyer, Michael C.}, title = {AdaptCMVC: Robust Adaption to Incremental Views in Continual Multi-view Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10285-10294} }
UA-Pose: Uncertainty-Aware 6D Object Pose Estimation and Online Object Completion with Partial References: Ming-Feng Li,

Xin Yang,

Fu-En Wang,

Hritam Basak,

Yuyin Sun,

Shreekant Gayaka,

Min Sun,

Cheng-Hao Kuo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ming-Feng and Yang, Xin and Wang, Fu-En and Basak, Hritam and Sun, Yuyin and Gayaka, Shreekant and Sun, Min and Kuo, Cheng-Hao}, title = {UA-Pose: Uncertainty-Aware 6D Object Pose Estimation and Online Object Completion with Partial References}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1180-1189} }
Binarized Mamba-Transformer for Lightweight Quad Bayer HybridEVS Demosaicing: Shiyang Zhou,

Haijin Zeng,

Yunfan Lu,

Tong Shao,

Ke Tang,

Yongyong Chen,

Jie Liu,

Jingyong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shiyang and Zeng, Haijin and Lu, Yunfan and Shao, Tong and Tang, Ke and Chen, Yongyong and Liu, Jie and Su, Jingyong}, title = {Binarized Mamba-Transformer for Lightweight Quad Bayer HybridEVS Demosaicing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8817-8827} }
Interpretable Image Classification via Non-parametric Part Prototype Learning: Zhijie Zhu,

Lei Fan,

Maurice Pagnucco,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zhijie and Fan, Lei and Pagnucco, Maurice and Song, Yang}, title = {Interpretable Image Classification via Non-parametric Part Prototype Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9762-9771} }
DAGSM: Disentangled Avatar Generation with GS-enhanced Mesh: Jingyu Zhuang,

Di Kang,

Linchao Bao,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Jingyu and Kang, Di and Bao, Linchao and Lin, Liang and Li, Guanbin}, title = {DAGSM: Disentangled Avatar Generation with GS-enhanced Mesh}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {292-303} }
Estimating Body and Hand Motion in an Ego-sensed World: Brent Yi,

Vickie Ye,

Maya Zheng,

Yunqi Li,

Lea Müller,

Georgios Pavlakos,

Yi Ma,

Jitendra Malik,

Angjoo Kanazawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yi_2025_CVPR, author = {Yi, Brent and Ye, Vickie and Zheng, Maya and Li, Yunqi and M\"uller, Lea and Pavlakos, Georgios and Ma, Yi and Malik, Jitendra and Kanazawa, Angjoo}, title = {Estimating Body and Hand Motion in an Ego-sensed World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7072-7084} }
Evaluating Vision-Language Models as Evaluators in Path Planning: Mohamed Aghzal,

Xiang Yue,

Erion Plaku,

Ziyu Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aghzal_2025_CVPR, author = {Aghzal, Mohamed and Yue, Xiang and Plaku, Erion and Yao, Ziyu}, title = {Evaluating Vision-Language Models as Evaluators in Path Planning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6886-6897} }
Free on the Fly: Enhancing Flexibility in Test-Time Adaptation with Online EM: Qiyuan Dai,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyuan and Yang, Sibei}, title = {Free on the Fly: Enhancing Flexibility in Test-Time Adaptation with Online EM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9538-9548} }
SGC-Net: Stratified Granular Comparison Network for Open-Vocabulary HOI Detection: Xin Lin,

Chong Shi,

Zuopeng Yang,

Haojin Tang,

Zhili Zhou; [pdf]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Xin and Shi, Chong and Yang, Zuopeng and Tang, Haojin and Zhou, Zhili}, title = {SGC-Net: Stratified Granular Comparison Network for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4539-4549} }
Galaxy Walker: Geometry-aware VLMs For Galaxy-scale Understanding: Tianyu Chen,

Xingcheng Fu,

Yisen Gao,

Haodong Qian,

Yuecen Wei,

Kun Yan,

Haoyi Zhou,

Jianxin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Tianyu and Fu, Xingcheng and Gao, Yisen and Qian, Haodong and Wei, Yuecen and Yan, Kun and Zhou, Haoyi and Li, Jianxin}, title = {Galaxy Walker: Geometry-aware VLMs For Galaxy-scale Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4112-4121} }
SnowMaster: Comprehensive Real-world Image Desnowing via MLLM with Multi-Model Feedback Optimization: Jianyu Lai,

Sixiang Chen,

Yunlong Lin,

Tian Ye,

Yun Liu,

Song Fei,

Zhaohu Xing,

Hongtao Wu,

Weiming Wang,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Jianyu and Chen, Sixiang and Lin, Yunlong and Ye, Tian and Liu, Yun and Fei, Song and Xing, Zhaohu and Wu, Hongtao and Wang, Weiming and Zhu, Lei}, title = {SnowMaster: Comprehensive Real-world Image Desnowing via MLLM with Multi-Model Feedback Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4302-4312} }
Exploring Timeline Control for Facial Motion Generation: Yifeng Ma,

Jinwei Qi,

Chaonan Ji,

Peng Zhang,

Bang Zhang,

Zhidong Deng,

Liefeng Bo; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Yifeng and Qi, Jinwei and Ji, Chaonan and Zhang, Peng and Zhang, Bang and Deng, Zhidong and Bo, Liefeng}, title = {Exploring Timeline Control for Facial Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1940-1950} }
GAF: Gaussian Avatar Reconstruction from Monocular Videos via Multi-view Diffusion: Jiapeng Tang,

Davide Davoli,

Tobias Kirschstein,

Liam Schoneveld,

Matthias Nießner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Jiapeng and Davoli, Davide and Kirschstein, Tobias and Schoneveld, Liam and Nie{\ss}ner, Matthias}, title = {GAF: Gaussian Avatar Reconstruction from Monocular Videos via Multi-view Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5546-5558} }
AI-Face: A Million-Scale Demographically Annotated AI-Generated Face Dataset and Fairness Benchmark: Li Lin,

Santosh Santosh,

Mingyang Wu,

Xin Wang,

Shu Hu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Li and Santosh, Santosh and Wu, Mingyang and Wang, Xin and Hu, Shu}, title = {AI-Face: A Million-Scale Demographically Annotated AI-Generated Face Dataset and Fairness Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3503-3515} }
Enhancing Video-LLM Reasoning via Agent-of-Thoughts Distillation: Yudi Shi,

Shangzhe Di,

Qirui Chen,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Xie, Weidi}, title = {Enhancing Video-LLM Reasoning via Agent-of-Thoughts Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8523-8533} }
De^2Gaze: Deformable and Decoupled Representation Learning for 3D Gaze Estimation: Yunfeng Xiao,

Xiaowei Bai,

Baojun Chen,

Hao Su,

Hao He,

Liang Xie,

Erwei Yin; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yunfeng and Bai, Xiaowei and Chen, Baojun and Su, Hao and He, Hao and Xie, Liang and Yin, Erwei}, title = {De{\textasciicircum}2Gaze: Deformable and Decoupled Representation Learning for 3D Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3091-3100} }
ReCapture: Generative Video Camera Controls for User-Provided Videos using Masked Video Fine-Tuning: David Junhao Zhang,

Roni Paiss,

Shiran Zada,

Nikhil Karnad,

David E. Jacobs,

Yael Pritch,

Inbar Mosseri,

Mike Zheng Shou,

Neal Wadhwa,

Nataniel Ruiz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, David Junhao and Paiss, Roni and Zada, Shiran and Karnad, Nikhil and Jacobs, David E. and Pritch, Yael and Mosseri, Inbar and Shou, Mike Zheng and Wadhwa, Neal and Ruiz, Nataniel}, title = {ReCapture: Generative Video Camera Controls for User-Provided Videos using Masked Video Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2050-2062} }
Self-Expansion of Pre-trained Models with Mixture of Adapters for Continual Learning: Huiyi Wang,

Haodong Lu,

Lina Yao,

Dong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Huiyi and Lu, Haodong and Yao, Lina and Gong, Dong}, title = {Self-Expansion of Pre-trained Models with Mixture of Adapters for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10087-10098} }
Brain-Inspired Spiking Neural Networks for Energy-Efficient Object Detection: Ziqi Li,

Tao Gao,

Yisheng An,

Ting Chen,

Jing Zhang,

Yuanbo Wen,

Mengkun Liu,

Qianxi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ziqi and Gao, Tao and An, Yisheng and Chen, Ting and Zhang, Jing and Wen, Yuanbo and Liu, Mengkun and Zhang, Qianxi}, title = {Brain-Inspired Spiking Neural Networks for Energy-Efficient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3552-3562} }
Medusa: A Multi-Scale High-order Contrastive Dual-Diffusion Approach for Multi-View Clustering: Liang Chen,

Zhe Xue,

Yawen Li,

Meiyu Liang,

Yan Wang,

Anton van den Hengel,

Yuankai Qi; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Liang and Xue, Zhe and Li, Yawen and Liang, Meiyu and Wang, Yan and van den Hengel, Anton and Qi, Yuankai}, title = {Medusa: A Multi-Scale High-order Contrastive Dual-Diffusion Approach for Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10295-10304} }
MambaOut: Do We Really Need Mamba for Vision?: Weihao Yu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Weihao and Wang, Xinchao}, title = {MambaOut: Do We Really Need Mamba for Vision?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4484-4496} }
Seurat: From Moving Points to Depth: Seokju Cho,

Jiahui Huang,

Seungryong Kim,

Joon-Young Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_CVPR, author = {Cho, Seokju and Huang, Jiahui and Kim, Seungryong and Lee, Joon-Young}, title = {Seurat: From Moving Points to Depth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7211-7221} }
Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models: Qirui Jiao,

Daoyuan Chen,

Yilun Huang,

Bolin Ding,

Yaliang Li,

Ying Shen; [pdf] [supp]
[bibtex]
@InProceedings{Jiao_2025_CVPR, author = {Jiao, Qirui and Chen, Daoyuan and Huang, Yilun and Ding, Bolin and Li, Yaliang and Shen, Ying}, title = {Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9296-9307} }
The Devil is in the Prompts: Retrieval-Augmented Prompt Optimization for Text-to-Video Generation: Bingjie Gao,

Xinyu Gao,

Xiaoxue Wu,

Yujie Zhou,

Yu Qiao,

Li Niu,

Xinyuan Chen,

Yaohui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Bingjie and Gao, Xinyu and Wu, Xiaoxue and Zhou, Yujie and Qiao, Yu and Niu, Li and Chen, Xinyuan and Wang, Yaohui}, title = {The Devil is in the Prompts: Retrieval-Augmented Prompt Optimization for Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3173-3183} }
DnLUT: Ultra-Efficient Color Image Denoising via Channel-Aware Lookup Tables: Sidi Yang,

Binxiao Huang,

Yulun Zhang,

Dahai Yu,

Yujiu Yang,

Ngai Wong; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Sidi and Huang, Binxiao and Zhang, Yulun and Yu, Dahai and Yang, Yujiu and Wong, Ngai}, title = {DnLUT: Ultra-Efficient Color Image Denoising via Channel-Aware Lookup Tables}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7582-7591} }
BiM-VFI: Bidirectional Motion Field-Guided Frame Interpolation for Video with Non-uniform Motions: Wonyong Seo,

Jihyong Oh,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Seo_2025_CVPR, author = {Seo, Wonyong and Oh, Jihyong and Kim, Munchurl}, title = {BiM-VFI: Bidirectional Motion Field-Guided Frame Interpolation for Video with Non-uniform Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7244-7253} }
SATA: Spatial Autocorrelation Token Analysis for Enhancing the Robustness of Vision Transformers: Nick Nikzad,

Yi Liao,

Yongsheng Gao,

Jun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nikzad_2025_CVPR, author = {Nikzad, Nick and Liao, Yi and Gao, Yongsheng and Zhou, Jun}, title = {SATA: Spatial Autocorrelation Token Analysis for Enhancing the Robustness of Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9730-9739} }
Nested Diffusion Models Using Hierarchical Latent Priors: Xiao Zhang,

Ruoxi Jiang,

Rebecca Willett,

Michael Maire; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiao and Jiang, Ruoxi and Willett, Rebecca and Maire, Michael}, title = {Nested Diffusion Models Using Hierarchical Latent Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2502-2512} }
A Theory of Learning Unified Model via Knowledge Integration from Label Space Varying Domains: Dexuan Zhang,

Thomas Westfechtel,

Tatsuya Harada; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dexuan and Westfechtel, Thomas and Harada, Tatsuya}, title = {A Theory of Learning Unified Model via Knowledge Integration from Label Space Varying Domains}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10142-10152} }
HiLoTs: High-Low Temporal Sensitive Representation Learning for Semi-Supervised LiDAR Segmentation in Autonomous Driving: R.D. Lin,

Pengcheng Weng,

Yinqiao Wang,

Han Ding,

Jinsong Han,

Fei Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, R.D. and Weng, Pengcheng and Wang, Yinqiao and Ding, Han and Han, Jinsong and Wang, Fei}, title = {HiLoTs: High-Low Temporal Sensitive Representation Learning for Semi-Supervised LiDAR Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1429-1438} }
DKDM: Data-Free Knowledge Distillation for Diffusion Models with Any Architecture: Qianlong Xiang,

Miao Zhang,

Yuzhang Shang,

Jianlong Wu,

Yan Yan,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_CVPR, author = {Xiang, Qianlong and Zhang, Miao and Shang, Yuzhang and Wu, Jianlong and Yan, Yan and Nie, Liqiang}, title = {DKDM: Data-Free Knowledge Distillation for Diffusion Models with Any Architecture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2955-2965} }
SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization: Hongrui Jia,

Chaoya Jiang,

Haiyang Xu,

Wei Ye,

Mengfan Dong,

Ming Yan,

Ji Zhang,

Fei Huang,

Shikun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Xu, Haiyang and Ye, Wei and Dong, Mengfan and Yan, Ming and Zhang, Ji and Huang, Fei and Zhang, Shikun}, title = {SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9361-9371} }
Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization: Zefeng Zhang,

Hengzhu Tang,

Jiawei Sheng,

Zhenyu Zhang,

Yiming Ren,

Zhenyang Li,

Dawei Yin,

Duohe Ma,

Tingwen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zefeng and Tang, Hengzhu and Sheng, Jiawei and Zhang, Zhenyu and Ren, Yiming and Li, Zhenyang and Yin, Dawei and Ma, Duohe and Liu, Tingwen}, title = {Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9423-9433} }
Feat2GS: Probing Visual Foundation Models with Gaussian Splatting: Yue Chen,

Xingyu Chen,

Anpei Chen,

Gerard Pons-Moll,

Yuliang Xiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yue and Chen, Xingyu and Chen, Anpei and Pons-Moll, Gerard and Xiu, Yuliang}, title = {Feat2GS: Probing Visual Foundation Models with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6348-6361} }
LSNet: See Large, Focus Small: Ao Wang,

Hui Chen,

Zijia Lin,

Jungong Han,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {LSNet: See Large, Focus Small}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9718-9729} }
DynamicScaler: Seamless and Scalable Video Generation for Panoramic Scenes: Jinxiu Liu,

Shaoheng Lin,

Yinxiao Li,

Ming-Hsuan Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jinxiu and Lin, Shaoheng and Li, Yinxiao and Yang, Ming-Hsuan}, title = {DynamicScaler: Seamless and Scalable Video Generation for Panoramic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6144-6153} }
DocLayLLM: An Efficient Multi-modal Extension of Large Language Models for Text-rich Document Understanding: Wenhui Liao,

Jiapeng Wang,

Hongliang Li,

Chengyu Wang,

Jun Huang,

Lianwen Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Wenhui and Wang, Jiapeng and Li, Hongliang and Wang, Chengyu and Huang, Jun and Jin, Lianwen}, title = {DocLayLLM: An Efficient Multi-modal Extension of Large Language Models for Text-rich Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4038-4049} }
EDEN: Enhanced Diffusion for High-quality Large-motion Video Frame Interpolation: Zihao Zhang,

Haoran Chen,

Haoyu Zhao,

Guansong Lu,

Yanwei Fu,

Hang Xu,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihao and Chen, Haoran and Zhao, Haoyu and Lu, Guansong and Fu, Yanwei and Xu, Hang and Wu, Zuxuan}, title = {EDEN: Enhanced Diffusion for High-quality Large-motion Video Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2105-2115} }
Handling Spatial-Temporal Data Heterogeneity for Federated Continual Learning via Tail Anchor: Hao Yu,

Xin Yang,

Le Zhang,

Hanlin Gu,

Tianrui Li,

Lixin Fan,

Qiang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hao and Yang, Xin and Zhang, Le and Gu, Hanlin and Li, Tianrui and Fan, Lixin and Yang, Qiang}, title = {Handling Spatial-Temporal Data Heterogeneity for Federated Continual Learning via Tail Anchor}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4874-4883} }
DeSiRe-GS: 4D Street Gaussians for Static-Dynamic Decomposition and Surface Reconstruction for Urban Driving Scenes: Chensheng Peng,

Chengwei Zhang,

Yixiao Wang,

Chenfeng Xu,

Yichen Xie,

Wenzhao Zheng,

Kurt Keutzer,

Masayoshi Tomizuka,

Wei Zhan; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Chensheng and Zhang, Chengwei and Wang, Yixiao and Xu, Chenfeng and Xie, Yichen and Zheng, Wenzhao and Keutzer, Kurt and Tomizuka, Masayoshi and Zhan, Wei}, title = {DeSiRe-GS: 4D Street Gaussians for Static-Dynamic Decomposition and Surface Reconstruction for Urban Driving Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6782-6791} }
REWIND: Real-Time Egocentric Whole-Body Motion Diffusion with Exemplar-Based Identity Conditioning: Jihyun Lee,

Weipeng Xu,

Alexander Richard,

Shih-En Wei,

Shunsuke Saito,

Shaojie Bai,

Te-Li Wang,

Minhyuk Sung,

Tae-Kyun Kim,

Jason Saragih; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jihyun and Xu, Weipeng and Richard, Alexander and Wei, Shih-En and Saito, Shunsuke and Bai, Shaojie and Wang, Te-Li and Sung, Minhyuk and Kim, Tae-Kyun and Saragih, Jason}, title = {REWIND: Real-Time Egocentric Whole-Body Motion Diffusion with Exemplar-Based Identity Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7095-7104} }
DoraCycle: Domain-Oriented Adaptation of Unified Generative Model in Multimodal Cycles: Rui Zhao,

Weijia Mao,

Mike Zheng Shou; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Rui and Mao, Weijia and Shou, Mike Zheng}, title = {DoraCycle: Domain-Oriented Adaptation of Unified Generative Model in Multimodal Cycles}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2835-2846} }
Gaussian Splashing: Unified Particles for Versatile Motion Synthesis and Rendering: Yutao Feng,

Xiang Feng,

Yintong Shang,

Ying Jiang,

Chang Yu,

Zeshun Zong,

Tianjia Shao,

Hongzhi Wu,

Kun Zhou,

Chenfanfu Jiang,

Yin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Yutao and Feng, Xiang and Shang, Yintong and Jiang, Ying and Yu, Chang and Zong, Zeshun and Shao, Tianjia and Wu, Hongzhi and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {Gaussian Splashing: Unified Particles for Versatile Motion Synthesis and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {518-529} }
Improve Representation for Imbalanced Regression through Geometric Constraints: Zijian Dong,

Yilei Wu,

Chongyao Chen,

Yingtian Zou,

Yichi Zhang,

Juan Helen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Zijian and Wu, Yilei and Chen, Chongyao and Zou, Yingtian and Zhang, Yichi and Zhou, Juan Helen}, title = {Improve Representation for Imbalanced Regression through Geometric Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5082-5091} }
PartRM: Modeling Part-Level Dynamics with Large Cross-State Reconstruction Model: Mingju Gao,

Yike Pan,

Huan-ang Gao,

Zongzheng Zhang,

Wenyi Li,

Hao Dong,

Hao Tang,

Li Yi,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Mingju and Pan, Yike and Gao, Huan-ang and Zhang, Zongzheng and Li, Wenyi and Dong, Hao and Tang, Hao and Yi, Li and Zhao, Hao}, title = {PartRM: Modeling Part-Level Dynamics with Large Cross-State Reconstruction Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7004-7014} }
DiffFNO: Diffusion Fourier Neural Operator: Xiaoyi Liu,

Hao Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xiaoyi and Tang, Hao}, title = {DiffFNO: Diffusion Fourier Neural Operator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {150-160} }
Zero-Shot Styled Text Image Generation, but Make It Autoregressive: Vittorio Pippi,

Fabio Quattrini,

Silvia Cascianelli,

Alessio Tonioni,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pippi_2025_CVPR, author = {Pippi, Vittorio and Quattrini, Fabio and Cascianelli, Silvia and Tonioni, Alessio and Cucchiara, Rita}, title = {Zero-Shot Styled Text Image Generation, but Make It Autoregressive}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7910-7919} }
Leveraging Perturbation Robustness to Enhance Out-of-Distribution Detection: Wenxi Chen,

Raymond A. Yeh,

Shaoshuai Mou,

Yan Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Wenxi and Yeh, Raymond A. and Mou, Shaoshuai and Gu, Yan}, title = {Leveraging Perturbation Robustness to Enhance Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4724-4733} }
SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing: Seokhyeon Hong,

Chaelin Kim,

Serin Yoon,

Junghyun Nam,

Sihun Cha,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Seokhyeon and Kim, Chaelin and Yoon, Serin and Nam, Junghyun and Cha, Sihun and Noh, Junyong}, title = {SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7158-7168} }
LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping: Pascal Chang,

Sergio Sancho,

Jingwei Tang,

Markus Gross,

Vinicius Azevedo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Pascal and Sancho, Sergio and Tang, Jingwei and Gross, Markus and Azevedo, Vinicius}, title = {LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24-33} }
ShowMak3r: Compositional TV Show Reconstruction: Sangmin Kim,

Seunguk Do,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sangmin and Do, Seunguk and Park, Jaesik}, title = {ShowMak3r: Compositional TV Show Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {864-874} }
CADRef: Robust Out-of-Distribution Detection via Class-Aware Decoupled Relative Feature Leveraging: Zhiwei Ling,

Yachen Chang,

Hailiang Zhao,

Xinkui Zhao,

Kingsum Chow,

Shuiguang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2025_CVPR, author = {Ling, Zhiwei and Chang, Yachen and Zhao, Hailiang and Zhao, Xinkui and Chow, Kingsum and Deng, Shuiguang}, title = {CADRef: Robust Out-of-Distribution Detection via Class-Aware Decoupled Relative Feature Leveraging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4968-4977} }
VideoDirector: Precise Video Editing via Text-to-Video Models: Yukun Wang,

Longguang Wang,

Zhiyuan Ma,

Qibin Hu,

Kai Xu,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yukun and Wang, Longguang and Ma, Zhiyuan and Hu, Qibin and Xu, Kai and Guo, Yulan}, title = {VideoDirector: Precise Video Editing via Text-to-Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2589-2598} }
VISTA: Enhancing Long-Duration and High-Resolution Video Understanding by Video Spatiotemporal Augmentation: Weiming Ren,

Huan Yang,

Jie Min,

Cong Wei,

Wenhu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Weiming and Yang, Huan and Min, Jie and Wei, Cong and Chen, Wenhu}, title = {VISTA: Enhancing Long-Duration and High-Resolution Video Understanding by Video Spatiotemporal Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3804-3814} }
GA3CE: Unconstrained 3D Gaze Estimation with Gaze-Aware 3D Context Encoding: Yuki Kawana,

Shintaro Shiba,

Quan Kong,

Norimasa Kobori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawana_2025_CVPR, author = {Kawana, Yuki and Shiba, Shintaro and Kong, Quan and Kobori, Norimasa}, title = {GA3CE: Unconstrained 3D Gaze Estimation with Gaze-Aware 3D Context Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3081-3090} }
RigGS: Rigging of 3D Gaussians for Modeling Articulated Objects in Videos: Yuxin Yao,

Zhi Deng,

Junhui Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Yuxin and Deng, Zhi and Hou, Junhui}, title = {RigGS: Rigging of 3D Gaussians for Modeling Articulated Objects in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5592-5601} }
Noise Modeling in One Hour: Minimizing Preparation Efforts for Self-supervised Low-Light RAW Image Denoising: Feiran Li,

Haiyang Jiang,

Daisuke Iso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Feiran and Jiang, Haiyang and Iso, Daisuke}, title = {Noise Modeling in One Hour: Minimizing Preparation Efforts for Self-supervised Low-Light RAW Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5699-5708} }
High Dynamic Range Video Compression: A Large-Scale Benchmark Dataset and A Learned Bit-depth Scalable Compression Algorithm: Zhaoyi Tian,

Feifeng Wang,

Shiwei Wang,

Zihao Zhou,

Yao Zhu,

Liquan Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zhaoyi and Wang, Feifeng and Wang, Shiwei and Zhou, Zihao and Zhu, Yao and Shen, Liquan}, title = {High Dynamic Range Video Compression: A Large-Scale Benchmark Dataset and A Learned Bit-depth Scalable Compression Algorithm}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7320-7330} }
DivPrune: Diversity-based Visual Token Pruning for Large Multimodal Models: Saeed Ranjbar Alvar,

Gursimran Singh,

Mohammad Akbari,

Yong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alvar_2025_CVPR, author = {Alvar, Saeed Ranjbar and Singh, Gursimran and Akbari, Mohammad and Zhang, Yong}, title = {DivPrune: Diversity-based Visual Token Pruning for Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9392-9401} }
3D-AVS: LiDAR-based 3D Auto-Vocabulary Segmentation: Weijie Wei,

Osman Ülger,

Fatemeh Karimi Nejadasl,

Theo Gevers,

Martin R. Oswald; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Weijie and \"Ulger, Osman and Nejadasl, Fatemeh Karimi and Gevers, Theo and Oswald, Martin R.}, title = {3D-AVS: LiDAR-based 3D Auto-Vocabulary Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8910-8920} }
MEGA: Masked Generative Autoencoder for Human Mesh Recovery: Guénolé Fiche,

Simon Leglaive,

Xavier Alameda-Pineda,

Francesc Moreno-Noguer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fiche_2025_CVPR, author = {Fiche, Gu\'enol\'e and Leglaive, Simon and Alameda-Pineda, Xavier and Moreno-Noguer, Francesc}, title = {MEGA: Masked Generative Autoencoder for Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5366-5378} }
Disentangling Safe and Unsafe Image Corruptions via Anisotropy and Locality: Ramchandran Muthukumar,

Ambar Pal,

Jeremias Sulam,

Rene Vidal; [pdf] [supp]
[bibtex]
@InProceedings{Muthukumar_2025_CVPR, author = {Muthukumar, Ramchandran and Pal, Ambar and Sulam, Jeremias and Vidal, Rene}, title = {Disentangling Safe and Unsafe Image Corruptions via Anisotropy and Locality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9954-9963} }
Prometheus: 3D-Aware Latent Diffusion Models for Feed-Forward Text-to-3D Scene Generation: Yuanbo Yang,

Jiahao Shao,

Xinyang Li,

Yujun Shen,

Andreas Geiger,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yuanbo and Shao, Jiahao and Li, Xinyang and Shen, Yujun and Geiger, Andreas and Liao, Yiyi}, title = {Prometheus: 3D-Aware Latent Diffusion Models for Feed-Forward Text-to-3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2857-2869} }
SphereUFormer: A U-Shaped Transformer for Spherical 360 Perception: Yaniv Benny,

Lior Wolf; [pdf] [supp]
[bibtex]
@InProceedings{Benny_2025_CVPR, author = {Benny, Yaniv and Wolf, Lior}, title = {SphereUFormer: A U-Shaped Transformer for Spherical 360 Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {940-950} }
Beyond Clean Training Data: A Versatile and Model-Agnostic Framework for Out-of-Distribution Detection with Contaminated Training Data: Yuchuan Li,

Jae-Mo Kang,

Il-Min Kim; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuchuan and Kang, Jae-Mo and Kim, Il-Min}, title = {Beyond Clean Training Data: A Versatile and Model-Agnostic Framework for Out-of-Distribution Detection with Contaminated Training Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10183-10192} }
FreeUV: Ground-Truth-Free Realistic Facial UV Texture Recovery via Cross-Assembly Inference Strategy: Xingchao Yang,

Takafumi Taketomi,

Yuki Endo,

Yoshihiro Kanamori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Xingchao and Taketomi, Takafumi and Endo, Yuki and Kanamori, Yoshihiro}, title = {FreeUV: Ground-Truth-Free Realistic Facial UV Texture Recovery via Cross-Assembly Inference Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {326-337} }
HarmonySet: A Comprehensive Dataset for Understanding Video-Music Semantic Alignment and Temporal Synchronization: Zitang Zhou,

Ke Mei,

Yu Lu,

Tianyi Wang,

Fengyun Rao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zitang and Mei, Ke and Lu, Yu and Wang, Tianyi and Rao, Fengyun}, title = {HarmonySet: A Comprehensive Dataset for Understanding Video-Music Semantic Alignment and Temporal Synchronization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3152-3162} }
StyleMaster: Stylize Your Video with Artistic Generation and Translation: Zixuan Ye,

Huijuan Huang,

Xintao Wang,

Pengfei Wan,

Di Zhang,

Wenhan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Zixuan and Huang, Huijuan and Wang, Xintao and Wan, Pengfei and Zhang, Di and Luo, Wenhan}, title = {StyleMaster: Stylize Your Video with Artistic Generation and Translation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2630-2640} }
Unsupervised Continual Domain Shift Learning with Multi-Prototype Modeling: Haopeng Sun,

Yingwei Zhang,

Lumin Xu,

Sheng Jin,

Ping Luo,

Chen Qian,

Wentao Liu,

Yiqiang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Haopeng and Zhang, Yingwei and Xu, Lumin and Jin, Sheng and Luo, Ping and Qian, Chen and Liu, Wentao and Chen, Yiqiang}, title = {Unsupervised Continual Domain Shift Learning with Multi-Prototype Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10131-10141} }
OmniGuard: Hybrid Manipulation Localization via Augmented Versatile Deep Image Watermarking: Xuanyu Zhang,

Zecheng Tang,

Zhipei Xu,

Runyi Li,

Youmin Xu,

Bin Chen,

Feng Gao,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xuanyu and Tang, Zecheng and Xu, Zhipei and Li, Runyi and Xu, Youmin and Chen, Bin and Gao, Feng and Zhang, Jian}, title = {OmniGuard: Hybrid Manipulation Localization via Augmented Versatile Deep Image Watermarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3008-3018} }
Open-Canopy: Towards Very High Resolution Forest Monitoring: Fajwel Fogel,

Yohann Perron,

Nikola Besic,

Laurent Saint-André,

Agnès Pellissier-Tanon,

Martin Schwartz,

Thomas Boudras,

Ibrahim Fayad,

Alexandre d'Aspremont,

Loic Landrieu,

Philippe Ciais; [pdf] [supp]
[bibtex]
@InProceedings{Fogel_2025_CVPR, author = {Fogel, Fajwel and Perron, Yohann and Besic, Nikola and Saint-Andr\'e, Laurent and Pellissier-Tanon, Agn\`es and Schwartz, Martin and Boudras, Thomas and Fayad, Ibrahim and d'Aspremont, Alexandre and Landrieu, Loic and Ciais, Philippe}, title = {Open-Canopy: Towards Very High Resolution Forest Monitoring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1395-1406} }
Vision-Language Model IP Protection via Prompt-based Learning: Lianyu Wang,

Meng Wang,

Huazhu Fu,

Daoqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lianyu and Wang, Meng and Fu, Huazhu and Zhang, Daoqiang}, title = {Vision-Language Model IP Protection via Prompt-based Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9497-9506} }
Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation: Jiantao Lin,

Xin Yang,

Meixi Chen,

Yingjie Xu,

Dongyu Yan,

Leyi Wu,

Xinli Xu,

Lie Xu,

Shunsi Zhang,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jiantao and Yang, Xin and Chen, Meixi and Xu, Yingjie and Yan, Dongyu and Wu, Leyi and Xu, Xinli and Xu, Lie and Zhang, Shunsi and Chen, Ying-Cong}, title = {Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5870-5880} }
Koala-36M: A Large-scale Video Dataset Improving Consistency between Fine-grained Conditions and Video Content: Qiuheng Wang,

Yukai Shi,

Jiarong Ou,

Rui Chen,

Ke Lin,

Jiahao Wang,

Boyuan Jiang,

Haotian Yang,

Mingwu Zheng,

Xin Tao,

Fei Yang,

Pengfei Wan,

Di Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qiuheng and Shi, Yukai and Ou, Jiarong and Chen, Rui and Lin, Ke and Wang, Jiahao and Jiang, Boyuan and Yang, Haotian and Zheng, Mingwu and Tao, Xin and Yang, Fei and Wan, Pengfei and Zhang, Di}, title = {Koala-36M: A Large-scale Video Dataset Improving Consistency between Fine-grained Conditions and Video Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8428-8437} }
VASparse: Towards Efficient Visual Hallucination Mitigation via Visual-Aware Token Sparsification: Xianwei Zhuang,

Zhihong Zhu,

Yuxin Xie,

Liming Liang,

Yuexian Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Xianwei and Zhu, Zhihong and Xie, Yuxin and Liang, Liming and Zou, Yuexian}, title = {VASparse: Towards Efficient Visual Hallucination Mitigation via Visual-Aware Token Sparsification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4189-4199} }
SPARC: Score Prompting and Adaptive Fusion for Zero-Shot Multi-Label Recognition in Vision-Language Models: Kevin Miller,

Aditya Gangrade,

Samarth Mishra,

Kate Saenko,

Venkatesh Saligrama; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miller_2025_CVPR, author = {Miller, Kevin and Gangrade, Aditya and Mishra, Samarth and Saenko, Kate and Saligrama, Venkatesh}, title = {SPARC: Score Prompting and Adaptive Fusion for Zero-Shot Multi-Label Recognition in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4313-4321} }
Erase Diffusion: Empowering Object Removal Through Calibrating Diffusion Pathways: Yi Liu,

Hao Zhou,

Benlei Cui,

Wenxiang Shang,

Ran Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yi and Zhou, Hao and Cui, Benlei and Shang, Wenxiang and Lin, Ran}, title = {Erase Diffusion: Empowering Object Removal Through Calibrating Diffusion Pathways}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2418-2427} }
Prompt-CAM: Making Vision Transformers Interpretable for Fine-Grained Analysis: Arpita Chowdhury,

Dipanjyoti Paul,

Zheda Mai,

Jianyang Gu,

Ziheng Zhang,

Kazi Sajeed Mehrab,

Elizabeth G. Campolongo,

Daniel Rubenstein,

Charles V. Stewart,

Anuj Karpatne,

Tanya Berger-Wolf,

Yu Su,

Wei-Lun Chao; [pdf] [supp]
[bibtex]
@InProceedings{Chowdhury_2025_CVPR, author = {Chowdhury, Arpita and Paul, Dipanjyoti and Mai, Zheda and Gu, Jianyang and Zhang, Ziheng and Mehrab, Kazi Sajeed and Campolongo, Elizabeth G. and Rubenstein, Daniel and Stewart, Charles V. and Karpatne, Anuj and Berger-Wolf, Tanya and Su, Yu and Chao, Wei-Lun}, title = {Prompt-CAM: Making Vision Transformers Interpretable for Fine-Grained Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4375-4385} }
Instruction-based Image Manipulation by Watching How Things Move: Mingdeng Cao,

Xuaner Zhang,

Yinqiang Zheng,

Zhihao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Mingdeng and Zhang, Xuaner and Zheng, Yinqiang and Xia, Zhihao}, title = {Instruction-based Image Manipulation by Watching How Things Move}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2704-2713} }
Ferret: An Efficient Online Continual Learning Framework under Varying Memory Constraints: Yuhao Zhou,

Yuxin Tian,

Jindi Lv,

Mingjia Shi,

Yuanxi Li,

Qing Ye,

Shuhao Zhang,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yuhao and Tian, Yuxin and Lv, Jindi and Shi, Mingjia and Li, Yuanxi and Ye, Qing and Zhang, Shuhao and Lv, Jiancheng}, title = {Ferret: An Efficient Online Continual Learning Framework under Varying Memory Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4850-4861} }
VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?: Yunlong Tang,

Junjia Guo,

Hang Hua,

Susan Liang,

Mingqian Feng,

Xinyang Li,

Rui Mao,

Chao Huang,

Jing Bi,

Zeliang Zhang,

Pooyan Fazli,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yunlong and Guo, Junjia and Hua, Hang and Liang, Susan and Feng, Mingqian and Li, Xinyang and Mao, Rui and Huang, Chao and Bi, Jing and Zhang, Zeliang and Fazli, Pooyan and Xu, Chenliang}, title = {VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8490-8500} }
Self-Supervised Learning for Color Spike Camera Reconstruction: Yanchen Dong,

Ruiqin Xiong,

Xiaopeng Fan,

Zhaofei Yu,

Yonghong Tian,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Yanchen and Xiong, Ruiqin and Fan, Xiaopeng and Yu, Zhaofei and Tian, Yonghong and Huang, Tiejun}, title = {Self-Supervised Learning for Color Spike Camera Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6231-6240} }
From Elements to Design: A Layered Approach for Automatic Graphic Design Composition: Jiawei Lin,

Shizhao Sun,

Danqing Huang,

Ting Liu,

Ji Li,

Jiang Bian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jiawei and Sun, Shizhao and Huang, Danqing and Liu, Ting and Li, Ji and Bian, Jiang}, title = {From Elements to Design: A Layered Approach for Automatic Graphic Design Composition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8128-8137} }
SALOVA: Segment-Augmented Long Video Assistant for Targeted Retrieval and Routing in Long-Form Video Analysis: Junho Kim,

Hyunjun Kim,

Hosu Lee,

Yong Man Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Junho and Kim, Hyunjun and Lee, Hosu and Ro, Yong Man}, title = {SALOVA: Segment-Augmented Long Video Assistant for Targeted Retrieval and Routing in Long-Form Video Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3352-3362} }
DA-VPT: Semantic-Guided Visual Prompt Tuning for Vision Transformers: Li Ren,

Chen Chen,

Liqiang Wang,

Kien Hua; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Li and Chen, Chen and Wang, Liqiang and Hua, Kien}, title = {DA-VPT: Semantic-Guided Visual Prompt Tuning for Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4353-4363} }
Towards Lossless Implicit Neural Representation via Bit Plane Decomposition: Woo Kyoung Han,

Byeonghun Lee,

Hyunmin Cho,

Sunghoon Im,

Kyong Hwan Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Woo Kyoung and Lee, Byeonghun and Cho, Hyunmin and Im, Sunghoon and Jin, Kyong Hwan}, title = {Towards Lossless Implicit Neural Representation via Bit Plane Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2269-2278} }
iSegMan: Interactive Segment-and-Manipulate 3D Gaussians: Yian Zhao,

Wanshi Xu,

Ruochong Zheng,

Pengchong Qiao,

Chang Liu,

Jie Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yian and Xu, Wanshi and Zheng, Ruochong and Qiao, Pengchong and Liu, Chang and Chen, Jie}, title = {iSegMan: Interactive Segment-and-Manipulate 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {661-670} }
BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices: Xudong Lu,

Yinghao Chen,

Cheng Chen,

Hui Tan,

Boheng Chen,

Yina Xie,

Rui Hu,

Guanxin Tan,

Renshou Wu,

Yan Hu,

Yi Zeng,

Lei Wu,

Liuyang Bian,

Zhaoxiong Wang,

Long Liu,

Yanzhou Yang,

Han Xiao,

Aojun Zhou,

Yafei Wen,

Xiaoxin Chen,

Shuai Ren,

Hongsheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Xudong and Chen, Yinghao and Chen, Cheng and Tan, Hui and Chen, Boheng and Xie, Yina and Hu, Rui and Tan, Guanxin and Wu, Renshou and Hu, Yan and Zeng, Yi and Wu, Lei and Bian, Liuyang and Wang, Zhaoxiong and Liu, Long and Yang, Yanzhou and Xiao, Han and Zhou, Aojun and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4145-4155} }
Unraveling Normal Anatomy via Fluid-Driven Anomaly Randomization: Peirong Liu,

Ana Lawry Aguila,

Juan E. Iglesias; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Peirong and Aguila, Ana Lawry and Iglesias, Juan E.}, title = {Unraveling Normal Anatomy via Fluid-Driven Anomaly Randomization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10455-10465} }
Taming Teacher Forcing for Masked Autoregressive Video Generation: Deyu Zhou,

Quan Sun,

Yuang Peng,

Kun Yan,

Runpei Dong,

Duomin Wang,

Zheng Ge,

Nan Duan,

Xiangyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Deyu and Sun, Quan and Peng, Yuang and Yan, Kun and Dong, Runpei and Wang, Duomin and Ge, Zheng and Duan, Nan and Zhang, Xiangyu}, title = {Taming Teacher Forcing for Masked Autoregressive Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7374-7384} }
Revisiting Backdoor Attacks against Large Vision-Language Models from Domain Shift: Siyuan Liang,

Jiawei Liang,

Tianyu Pang,

Chao Du,

Aishan Liu,

Mingli Zhu,

Xiaochun Cao,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Siyuan and Liang, Jiawei and Pang, Tianyu and Du, Chao and Liu, Aishan and Zhu, Mingli and Cao, Xiaochun and Tao, Dacheng}, title = {Revisiting Backdoor Attacks against Large Vision-Language Models from Domain Shift}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9477-9486} }
TCFG: Tangential Damping Classifier-free Guidance: Mingi Kwon,

Shin seong Kim,

Jaeseok Jeong,

Yi Ting Hsiao,

Youngjung Uh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2025_CVPR, author = {Kwon, Mingi and Kim, Shin seong and Jeong, Jaeseok and Hsiao, Yi Ting and Uh, Youngjung}, title = {TCFG: Tangential Damping Classifier-free Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2620-2629} }
MatAnyone: Stable Video Matting with Consistent Memory Propagation: Peiqing Yang,

Shangchen Zhou,

Jixin Zhao,

Qingyi Tao,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Peiqing and Zhou, Shangchen and Zhao, Jixin and Tao, Qingyi and Loy, Chen Change}, title = {MatAnyone: Stable Video Matting with Consistent Memory Propagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7299-7308} }
Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models: Matt Deitke,

Christopher Clark,

Sangho Lee,

Rohun Tripathi,

Yue Yang,

Jae Sung Park,

Mohammadreza Salehi,

Niklas Muennighoff,

Kyle Lo,

Luca Soldaini,

Jiasen Lu,

Taira Anderson,

Erin Bransom,

Kiana Ehsani,

Huong Ngo,

YenSung Chen,

Ajay Patel,

Mark Yatskar,

Chris Callison-Burch,

Andrew Head,

Rose Hendrix,

Favyen Bastani,

Eli VanderBilt,

Nathan Lambert,

Yvonne Chou,

Arnavi Chheda,

Jenna Sparks,

Sam Skjonsberg,

Michael Schmitz,

Aaron Sarnat,

Byron Bischoff,

Pete Walsh,

Chris Newell,

Piper Wolters,

Tanmay Gupta,

Kuo-Hao Zeng,

Jon Borchardt,

Dirk Groeneveld,

Crystal Nam,

Sophie Lebrecht,

Caitlin Wittlif,

Carissa Schoenick,

Oscar Michel,

Ranjay Krishna,

Luca Weihs,

Noah A. Smith,

Hannaneh Hajishirzi,

Ross Girshick,

Ali Farhadi,

Aniruddha Kembhavi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deitke_2025_CVPR, author = {Deitke, Matt and Clark, Christopher and Lee, Sangho and Tripathi, Rohun and Yang, Yue and Park, Jae Sung and Salehi, Mohammadreza and Muennighoff, Niklas and Lo, Kyle and Soldaini, Luca and Lu, Jiasen and Anderson, Taira and Bransom, Erin and Ehsani, Kiana and Ngo, Huong and Chen, YenSung and Patel, Ajay and Yatskar, Mark and Callison-Burch, Chris and Head, Andrew and Hendrix, Rose and Bastani, Favyen and VanderBilt, Eli and Lambert, Nathan and Chou, Yvonne and Chheda, Arnavi and Sparks, Jenna and Skjonsberg, Sam and Schmitz, Michael and Sarnat, Aaron and Bischoff, Byron and Walsh, Pete and Newell, Chris and Wolters, Piper and Gupta, Tanmay and Zeng, Kuo-Hao and Borchardt, Jon and Groeneveld, Dirk and Nam, Crystal and Lebrecht, Sophie and Wittlif, Caitlin and Schoenick, Carissa and Michel, Oscar and Krishna, Ranjay and Weihs, Luca and Smith, Noah A. and Hajishirzi, Hannaneh and Girshick, Ross and Farhadi, Ali and Kembhavi, Aniruddha}, title = {Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {91-104} }
MMTL-UniAD: A Unified Framework for Multimodal and Multi-Task Learning in Assistive Driving Perception: Wenzhuo Liu,

Wenshuo Wang,

Yicheng Qiao,

Qiannan Guo,

Jiayin Zhu,

Pengfei Li,

Zilong Chen,

Huiming Yang,

Zhiwei Li,

Lening Wang,

Tiao Tan,

Huaping Liu; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Wenzhuo and Wang, Wenshuo and Qiao, Yicheng and Guo, Qiannan and Zhu, Jiayin and Li, Pengfei and Chen, Zilong and Yang, Huiming and Li, Zhiwei and Wang, Lening and Tan, Tiao and Liu, Huaping}, title = {MMTL-UniAD: A Unified Framework for Multimodal and Multi-Task Learning in Assistive Driving Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6864-6874} }
T2V-CompBench: A Comprehensive Benchmark for Compositional Text-to-video Generation: Kaiyue Sun,

Kaiyi Huang,

Xian Liu,

Yue Wu,

Zihan Xu,

Zhenguo Li,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Kaiyue and Huang, Kaiyi and Liu, Xian and Wu, Yue and Xu, Zihan and Li, Zhenguo and Liu, Xihui}, title = {T2V-CompBench: A Comprehensive Benchmark for Compositional Text-to-video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8406-8416} }
Multimodal Autoregressive Pre-training of Large Vision Encoders: Enrico Fini,

Mustafa Shukor,

Xiujun Li,

Philipp Dufter,

Michal Klein,

David Haldimann,

Sai Aitharaju,

Victor G. Turrisi da Costa,

Louis Béthune,

Zhe Gan,

Alexander Toshev,

Marcin Eichner,

Moin Nabi,

Yinfei Yang,

Joshua Susskind,

Alaaeldin El-Nouby; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fini_2025_CVPR, author = {Fini, Enrico and Shukor, Mustafa and Li, Xiujun and Dufter, Philipp and Klein, Michal and Haldimann, David and Aitharaju, Sai and da Costa, Victor G. Turrisi and B\'ethune, Louis and Gan, Zhe and Toshev, Alexander and Eichner, Marcin and Nabi, Moin and Yang, Yinfei and Susskind, Joshua and El-Nouby, Alaaeldin}, title = {Multimodal Autoregressive Pre-training of Large Vision Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9641-9654} }
AKiRa: Augmentation Kit on Rays for Optical Video Generation: Xi Wang,

Robin Courant,

Marc Christie,

Vicky Kalogeiton; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xi and Courant, Robin and Christie, Marc and Kalogeiton, Vicky}, title = {AKiRa: Augmentation Kit on Rays for Optical Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2609-2619} }
TFCustom: Customized Image Generation with Time-Aware Frequency Feature Guidance: Mushui Liu,

Dong She,

Jingxuan Pang,

Qihan Huang,

Jiacheng Ying,

Wanggui He,

Yuanlei Hou,

Siming Fu; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Mushui and She, Dong and Pang, Jingxuan and Huang, Qihan and Ying, Jiacheng and He, Wanggui and Hou, Yuanlei and Fu, Siming}, title = {TFCustom: Customized Image Generation with Time-Aware Frequency Feature Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2714-2723} }
SketchFusion: Learning Universal Sketch Features through Fusing Foundation Models: Subhadeep Koley,

Tapas Kumar Dutta,

Aneeshan Sain,

Pinaki Nath Chowdhury,

Ayan Kumar Bhunia,

Yi-Zhe Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Koley_2025_CVPR, author = {Koley, Subhadeep and Dutta, Tapas Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Bhunia, Ayan Kumar and Song, Yi-Zhe}, title = {SketchFusion: Learning Universal Sketch Features through Fusing Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2556-2567} }
Bridging the Vision-Brain Gap with an Uncertainty-Aware Blur Prior: Haitao Wu,

Qing Li,

Changqing Zhang,

Zhen He,

Xiaomin Ying; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Haitao and Li, Qing and Zhang, Changqing and He, Zhen and Ying, Xiaomin}, title = {Bridging the Vision-Brain Gap with an Uncertainty-Aware Blur Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2246-2257} }
AffordDP: Generalizable Diffusion Policy with Transferable Affordance: Shijie Wu,

Yihang Zhu,

Yunao Huang,

Kaizhen Zhu,

Jiayuan Gu,

Jingyi Yu,

Ye Shi,

Jingya Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Shijie and Zhu, Yihang and Huang, Yunao and Zhu, Kaizhen and Gu, Jiayuan and Yu, Jingyi and Shi, Ye and Wang, Jingya}, title = {AffordDP: Generalizable Diffusion Policy with Transferable Affordance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6971-6980} }
HMAR: Efficient Hierarchical Masked Auto-Regressive Image Generation: Hermann Kumbong,

Xian Liu,

Tsung-Yi Lin,

Ming-Yu Liu,

Xihui Liu,

Ziwei Liu,

Daniel Y. Fu,

Christopher Re,

David W. Romero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumbong_2025_CVPR, author = {Kumbong, Hermann and Liu, Xian and Lin, Tsung-Yi and Liu, Ming-Yu and Liu, Xihui and Liu, Ziwei and Fu, Daniel Y. and Re, Christopher and Romero, David W.}, title = {HMAR: Efficient Hierarchical Masked Auto-Regressive Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2535-2544} }
DKC: Differentiated Knowledge Consolidation for Cloth-Hybrid Lifelong Person Re-identification: Zhenyu Cui,

Jiahuan Zhou,

Yuxin Peng; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Zhenyu and Zhou, Jiahuan and Peng, Yuxin}, title = {DKC: Differentiated Knowledge Consolidation for Cloth-Hybrid Lifelong Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3573-3582} }
Enhancing Facial Privacy Protection via Weakening Diffusion Purification: Ali Salar,

Qing Liu,

Yingli Tian,

Guoying Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Salar_2025_CVPR, author = {Salar, Ali and Liu, Qing and Tian, Yingli and Zhao, Guoying}, title = {Enhancing Facial Privacy Protection via Weakening Diffusion Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8235-8244} }
ORIDa: Object-centric Real-world Image Composition Dataset: Jinwoo Kim,

Sangmin Han,

Jinho Jeong,

Jiwoo Choi,

Dongyeoung Kim,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jinwoo and Han, Sangmin and Jeong, Jinho and Choi, Jiwoo and Kim, Dongyeoung and Kim, Seon Joo}, title = {ORIDa: Object-centric Real-world Image Composition Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3051-3060} }
Image Generation Diversity Issues and How to Tame Them: Mischa Dombrowski,

Weitong Zhang,

Sarah Cechnicka,

Hadrien Reynaud,

Bernhard Kainz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dombrowski_2025_CVPR, author = {Dombrowski, Mischa and Zhang, Weitong and Cechnicka, Sarah and Reynaud, Hadrien and Kainz, Bernhard}, title = {Image Generation Diversity Issues and How to Tame Them}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3029-3039} }
Annotation Ambiguity Aware Semi-Supervised Medical Image Segmentation: Suruchi Kumari,

Pravendra Singh; [pdf] [supp]
[bibtex]
@InProceedings{Kumari_2025_CVPR, author = {Kumari, Suruchi and Singh, Pravendra}, title = {Annotation Ambiguity Aware Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10404-10413} }
CAP4D: Creating Animatable 4D Portrait Avatars with Morphable Multi-View Diffusion Models: Felix Taubner,

Ruihang Zhang,

Mathieu Tuli,

David B. Lindell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taubner_2025_CVPR, author = {Taubner, Felix and Zhang, Ruihang and Tuli, Mathieu and Lindell, David B.}, title = {CAP4D: Creating Animatable 4D Portrait Avatars with Morphable Multi-View Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5318-5330} }
CORE4D: A 4D Human-Object-Human Interaction Dataset for Collaborative Object REarrangement: Yun Liu,

Chengwen Zhang,

Ruofan Xing,

Bingda Tang,

Bowen Yang,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yun and Zhang, Chengwen and Xing, Ruofan and Tang, Bingda and Yang, Bowen and Yi, Li}, title = {CORE4D: A 4D Human-Object-Human Interaction Dataset for Collaborative Object REarrangement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1769-1782} }
POp-GS: Next Best View in 3D-Gaussian Splatting with P-Optimality: Joey Wilson,

Marcelino Almeida,

Sachit Mahajan,

Martin Labrie,

Maani Ghaffari,

Omid Ghasemalizadeh,

Min Sun,

Cheng-Hao Kuo,

Arnab Sen; [pdf] [supp]
[bibtex]
@InProceedings{Wilson_2025_CVPR, author = {Wilson, Joey and Almeida, Marcelino and Mahajan, Sachit and Labrie, Martin and Ghaffari, Maani and Ghasemalizadeh, Omid and Sun, Min and Kuo, Cheng-Hao and Sen, Arnab}, title = {POp-GS: Next Best View in 3D-Gaussian Splatting with P-Optimality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3646-3655} }
Critic-V: VLM Critics Help Catch VLM Errors in Multimodal Reasoning: Di Zhang,

Jingdi Lei,

Junxian Li,

Xunzhi Wang,

Yujie Liu,

Zonglin Yang,

Jiatong Li,

Weida Wang,

Suorong Yang,

Jianbo Wu,

Peng Ye,

Wanli Ouyang,

Dongzhan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Di and Lei, Jingdi and Li, Junxian and Wang, Xunzhi and Liu, Yujie and Yang, Zonglin and Li, Jiatong and Wang, Weida and Yang, Suorong and Wu, Jianbo and Ye, Peng and Ouyang, Wanli and Zhou, Dongzhan}, title = {Critic-V: VLM Critics Help Catch VLM Errors in Multimodal Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9050-9061} }
MaRI: Material Retrieval Integration across Domains: Jianhui Wang,

Zhifei Yang,

Yangfan He,

Huixiong Zhang,

Yuxuan Chen,

Jingwei Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jianhui and Yang, Zhifei and He, Yangfan and Zhang, Huixiong and Chen, Yuxuan and Huang, Jingwei}, title = {MaRI: Material Retrieval Integration across Domains}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5814-5823} }
Q-Bench-Video: Benchmark the Video Quality Understanding of LMMs: Zicheng Zhang,

Ziheng Jia,

Haoning Wu,

Chunyi Li,

Zijian Chen,

Yingjie Zhou,

Wei Sun,

Xiaohong Liu,

Xiongkuo Min,

Weisi Lin,

Guangtao Zhai; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zicheng and Jia, Ziheng and Wu, Haoning and Li, Chunyi and Chen, Zijian and Zhou, Yingjie and Sun, Wei and Liu, Xiaohong and Min, Xiongkuo and Lin, Weisi and Zhai, Guangtao}, title = {Q-Bench-Video: Benchmark the Video Quality Understanding of LMMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3229-3239} }
Glossy Object Reconstruction with Cost-effective Polarized Acquisition: Bojian Wu,

Yifan Peng,

Ruizhen Hu,

Xiaowei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Bojian and Peng, Yifan and Hu, Ruizhen and Zhou, Xiaowei}, title = {Glossy Object Reconstruction with Cost-effective Polarized Acquisition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {422-431} }
L-SWAG: Layer-Sample Wise Activation with Gradients Information for Zero-Shot NAS on Vision Transformers: Sofia Casarin,

Sergio Escalera,

Oswald Lanz; [pdf] [supp]
[bibtex]
@InProceedings{Casarin_2025_CVPR, author = {Casarin, Sofia and Escalera, Sergio and Lanz, Oswald}, title = {L-SWAG: Layer-Sample Wise Activation with Gradients Information for Zero-Shot NAS on Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4441-4451} }
Commonsense Video Question Answering through Video-Grounded Entailment Tree Reasoning: Huabin Liu,

Filip Ilievski,

Cees G. M. Snoek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Huabin and Ilievski, Filip and Snoek, Cees G. M.}, title = {Commonsense Video Question Answering through Video-Grounded Entailment Tree Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3262-3271} }
Lifelong Knowledge Editing for Vision Language Models with Low-Rank Mixture-of-Experts: Qizhou Chen,

Chengyu Wang,

Dakan Wang,

Taolin Zhang,

Wangyue Li,

Xiaofeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Qizhou and Wang, Chengyu and Wang, Dakan and Zhang, Taolin and Li, Wangyue and He, Xiaofeng}, title = {Lifelong Knowledge Editing for Vision Language Models with Low-Rank Mixture-of-Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9455-9466} }
PartGen: Part-level 3D Generation and Reconstruction with Multi-view Diffusion Models: Minghao Chen,

Roman Shapovalov,

Iro Laina,

Tom Monnier,

Jianyuan Wang,

David Novotny,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Minghao and Shapovalov, Roman and Laina, Iro and Monnier, Tom and Wang, Jianyuan and Novotny, David and Vedaldi, Andrea}, title = {PartGen: Part-level 3D Generation and Reconstruction with Multi-view Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5881-5892} }
SINR: Sparsity Driven Compressed Implicit Neural Representations: Dhananjaya Jayasundara,

Sudarshan Rajagopalan,

Yasiru Ranasinghe,

Trac D. Tran,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jayasundara_2025_CVPR, author = {Jayasundara, Dhananjaya and Rajagopalan, Sudarshan and Ranasinghe, Yasiru and Tran, Trac D. and Patel, Vishal M.}, title = {SINR: Sparsity Driven Compressed Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3061-3070} }
ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning: Kailin Li,

Puhao Li,

Tengyu Liu,

Yuyang Li,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Kailin and Li, Puhao and Liu, Tengyu and Li, Yuyang and Huang, Siyuan}, title = {ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6991-7003} }
Shining Yourself: High-Fidelity Ornaments Virtual Try-on with Diffusion Model: Yingmao Miao,

Zhanpeng Huang,

Rui Han,

Zibin Wang,

Chenhao Lin,

Chao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_CVPR, author = {Miao, Yingmao and Huang, Zhanpeng and Han, Rui and Wang, Zibin and Lin, Chenhao and Shen, Chao}, title = {Shining Yourself: High-Fidelity Ornaments Virtual Try-on with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {359-368} }
Universal Domain Adaptation for Semantic Segmentation: Seun-An Choe,

Keon-Hee Park,

Jinwoo Choi,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choe_2025_CVPR, author = {Choe, Seun-An and Park, Keon-Hee and Choi, Jinwoo and Park, Gyeong-Moon}, title = {Universal Domain Adaptation for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4607-4617} }
HyperGS: Hyperspectral 3D Gaussian Splatting: Christopher Thirgood,

Oscar Mendez,

Erin Ling,

Jon Storey,

Simon Hadfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thirgood_2025_CVPR, author = {Thirgood, Christopher and Mendez, Oscar and Ling, Erin and Storey, Jon and Hadfield, Simon}, title = {HyperGS: Hyperspectral 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5970-5979} }
LMO: Linear Mamba Operator for MRI Reconstruction: Wei Li,

Jiawei Jiang,

Jie Wu,

Kaihao Yu,

Jianwei Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wei and Jiang, Jiawei and Wu, Jie and Yu, Kaihao and Zheng, Jianwei}, title = {LMO: Linear Mamba Operator for MRI Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5112-5122} }
AnomalyNCD: Towards Novel Anomaly Class Discovery in Industrial Scenarios: Ziming Huang,

Xurui Li,

Haotian Liu,

Feng Xue,

Yuzhe Wang,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Ziming and Li, Xurui and Liu, Haotian and Xue, Feng and Wang, Yuzhe and Zhou, Yu}, title = {AnomalyNCD: Towards Novel Anomaly Class Discovery in Industrial Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4755-4765} }
Your Large Vision-Language Model Only Needs A Few Attention Heads For Visual Grounding: Seil Kang,

Jinyeong Kim,

Junhyeok Kim,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Seil and Kim, Jinyeong and Kim, Junhyeok and Hwang, Seong Jae}, title = {Your Large Vision-Language Model Only Needs A Few Attention Heads For Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9339-9350} }
Ges3ViG : Incorporating Pointing Gestures into Language-Based 3D Visual Grounding for Embodied Reference Understanding: Atharv Mahesh Mane,

Dulanga Weerakoon,

Vigneshwaran Subbaraju,

Sougata Sen,

Sanjay E. Sarma,

Archan Misra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mane_2025_CVPR, author = {Mane, Atharv Mahesh and Weerakoon, Dulanga and Subbaraju, Vigneshwaran and Sen, Sougata and Sarma, Sanjay E. and Misra, Archan}, title = {Ges3ViG : Incorporating Pointing Gestures into Language-Based 3D Visual Grounding for Embodied Reference Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9017-9026} }
Progressive Focused Transformer for Single Image Super-Resolution: Wei Long,

Xingyu Zhou,

Leheng Zhang,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2025_CVPR, author = {Long, Wei and Zhou, Xingyu and Zhang, Leheng and Gu, Shuhang}, title = {Progressive Focused Transformer for Single Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2279-2288} }
VladVA: Discriminative Fine-tuning of LVLMs: Yassine Ouali,

Adrian Bulat,

Alexandros Xenos,

Anestis Zaganidis,

Ioannis Maniadis Metaxas,

Brais Martinez,

Georgios Tzimiropoulos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouali_2025_CVPR, author = {Ouali, Yassine and Bulat, Adrian and Xenos, Alexandros and Zaganidis, Anestis and Metaxas, Ioannis Maniadis and Martinez, Brais and Tzimiropoulos, Georgios}, title = {VladVA: Discriminative Fine-tuning of LVLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4101-4111} }
HumanMM: Global Human Motion Recovery from Multi-shot Videos: Yuhong Zhang,

Guanlin Wu,

Ling-Hao Chen,

Zhuokai Zhao,

Jing Lin,

Xiaoke Jiang,

Jiamin Wu,

Zhuoheng Li,

Hao Frank Yang,

Haoqian Wang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuhong and Wu, Guanlin and Chen, Ling-Hao and Zhao, Zhuokai and Lin, Jing and Jiang, Xiaoke and Wu, Jiamin and Li, Zhuoheng and Yang, Hao Frank and Wang, Haoqian and Zhang, Lei}, title = {HumanMM: Global Human Motion Recovery from Multi-shot Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1973-1983} }
Removing Reflections from RAW Photos: Eric Kee,

Adam Pikielny,

Kevin Blackburn-Matzen,

Marc Levoy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kee_2025_CVPR, author = {Kee, Eric and Pikielny, Adam and Blackburn-Matzen, Kevin and Levoy, Marc}, title = {Removing Reflections from RAW Photos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {161-171} }
AMR-Transformer: Enabling Efficient Long-range Interaction for Complex Neural Fluid Simulation: Zeyi Xu,

Jinfan Liu,

Kuangxu Chen,

Ye Chen,

Zhangli Hu,

Bingbing Ni; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zeyi and Liu, Jinfan and Chen, Kuangxu and Chen, Ye and Hu, Zhangli and Ni, Bingbing}, title = {AMR-Transformer: Enabling Efficient Long-range Interaction for Complex Neural Fluid Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5804-5813} }
Blurry-Edges: Photon-Limited Depth Estimation from Defocused Boundaries: Wei Xu,

Charles James Wagner,

Junjie Luo,

Qi Guo; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Wei and Wagner, Charles James and Luo, Junjie and Guo, Qi}, title = {Blurry-Edges: Photon-Limited Depth Estimation from Defocused Boundaries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {432-441} }
MICAS: Multi-grained In-Context Adaptive Sampling for 3D Point Cloud Processing: Feifei Shao,

Ping Liu,

Zhao Wang,

Yawei Luo,

Hongwei Wang,

Jun Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Feifei and Liu, Ping and Wang, Zhao and Luo, Yawei and Wang, Hongwei and Xiao, Jun}, title = {MICAS: Multi-grained In-Context Adaptive Sampling for 3D Point Cloud Processing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6616-6626} }
GoalFlow: Goal-Driven Flow Matching for Multimodal Trajectories Generation in End-to-End Autonomous Driving: Zebin Xing,

Xingyu Zhang,

Yang Hu,

Bo Jiang,

Tong He,

Qian Zhang,

Xiaoxiao Long,

Wei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Zebin and Zhang, Xingyu and Hu, Yang and Jiang, Bo and He, Tong and Zhang, Qian and Long, Xiaoxiao and Yin, Wei}, title = {GoalFlow: Goal-Driven Flow Matching for Multimodal Trajectories Generation in End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1602-1611} }
ColabSfM: Collaborative Structure-from-Motion by Point Cloud Registration: Johan Edstedt,

André Mateus,

Alberto Jaenal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Edstedt_2025_CVPR, author = {Edstedt, Johan and Mateus, Andr\'e and Jaenal, Alberto}, title = {ColabSfM: Collaborative Structure-from-Motion by Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6573-6583} }
MangaNinja: Line Art Colorization with Precise Reference Following: Zhiheng Liu,

Ka Leong Cheng,

Xi Chen,

Jie Xiao,

Hao Ouyang,

Kai Zhu,

Yu Liu,

Yujun Shen,

Qifeng Chen,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhiheng and Cheng, Ka Leong and Chen, Xi and Xiao, Jie and Ouyang, Hao and Zhu, Kai and Liu, Yu and Shen, Yujun and Chen, Qifeng and Luo, Ping}, title = {MangaNinja: Line Art Colorization with Precise Reference Following}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5666-5677} }
Nonisotropic Gaussian Diffusion for Realistic 3D Human Motion Prediction: Cecilia Curreli,

Dominik Muhle,

Abhishek Saroha,

Zhenzhang Ye,

Riccardo Marin,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Curreli_2025_CVPR, author = {Curreli, Cecilia and Muhle, Dominik and Saroha, Abhishek and Ye, Zhenzhang and Marin, Riccardo and Cremers, Daniel}, title = {Nonisotropic Gaussian Diffusion for Realistic 3D Human Motion Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1871-1882} }
PICO: Reconstructing 3D People In Contact with Objects: Alpár Cseke,

Shashank Tripathi,

Sai Kumar Dwivedi,

Arjun S. Lakshmipathy,

Agniv Chatterjee,

Michael J. Black,

Dimitrios Tzionas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cseke_2025_CVPR, author = {Cseke, Alp\'ar and Tripathi, Shashank and Dwivedi, Sai Kumar and Lakshmipathy, Arjun S. and Chatterjee, Agniv and Black, Michael J. and Tzionas, Dimitrios}, title = {PICO: Reconstructing 3D People In Contact with Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1783-1794} }
Linguistics-aware Masked Image Modeling for Self-supervised Scene Text Recognition: Yifei Zhang,

Chang Liu,

Jin Wei,

Xiaomeng Yang,

Yu Zhou,

Can Ma,

Xiangyang Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yifei and Liu, Chang and Wei, Jin and Yang, Xiaomeng and Zhou, Yu and Ma, Can and Ji, Xiangyang}, title = {Linguistics-aware Masked Image Modeling for Self-supervised Scene Text Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9318-9328} }
Scaling up Image Segmentation across Data and Tasks: Pei Wang,

Zhaowei Cai,

Hao Yang,

Ashwin Swaminathan,

R. Manmatha,

Stefano Soatto; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Pei and Cai, Zhaowei and Yang, Hao and Swaminathan, Ashwin and Manmatha, R. and Soatto, Stefano}, title = {Scaling up Image Segmentation across Data and Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4573-4583} }
Bridging Past and Future: End-to-End Autonomous Driving with Historical Prediction and Planning: Bozhou Zhang,

Nan Song,

Xin Jin,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bozhou and Song, Nan and Jin, Xin and Zhang, Li}, title = {Bridging Past and Future: End-to-End Autonomous Driving with Historical Prediction and Planning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6854-6863} }
Blood Flow Speed Estimation with Optical Coherence Tomography Angiography Images: Wensheng Cheng,

Zhenghong Li,

Jiaxiang Ren,

Hyomin Jeong,

Congwu Du,

Yingtian Pan,

Haibin Ling; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Wensheng and Li, Zhenghong and Ren, Jiaxiang and Jeong, Hyomin and Du, Congwu and Pan, Yingtian and Ling, Haibin}, title = {Blood Flow Speed Estimation with Optical Coherence Tomography Angiography Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10466-10475} }
DreamTrack: Dreaming the Future for Multimodal Visual Object Tracking: Mingzhe Guo,

Weiping Tan,

Wenyu Ran,

Liping Jing,

Zhipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Mingzhe and Tan, Weiping and Ran, Wenyu and Jing, Liping and Zhang, Zhipeng}, title = {DreamTrack: Dreaming the Future for Multimodal Visual Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7201-7210} }
OmniStyle: Filtering High Quality Style Transfer Data at Scale: Ye Wang,

Ruiqi Liu,

Jiang Lin,

Fei Liu,

Zili Yi,

Yilin Wang,

Rui Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ye and Liu, Ruiqi and Lin, Jiang and Liu, Fei and Yi, Zili and Wang, Yilin and Ma, Rui}, title = {OmniStyle: Filtering High Quality Style Transfer Data at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7847-7856} }
Cross-View Completion Models are Zero-shot Correspondence Estimators: Honggyu An,

Jin Hyeon Kim,

Seonghoon Park,

Jaewoo Jung,

Jisang Han,

Sunghwan Hong,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2025_CVPR, author = {An, Honggyu and Kim, Jin Hyeon and Park, Seonghoon and Jung, Jaewoo and Han, Jisang and Hong, Sunghwan and Kim, Seungryong}, title = {Cross-View Completion Models are Zero-shot Correspondence Estimators}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1103-1115} }
Multi-party Collaborative Attention Control for Image Customization: Han Yang,

Chuanguang Yang,

Qiuli Wang,

Zhulin An,

Weilun Feng,

Libo Huang,

Yongjun Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Han and Yang, Chuanguang and Wang, Qiuli and An, Zhulin and Feng, Weilun and Huang, Libo and Xu, Yongjun}, title = {Multi-party Collaborative Attention Control for Image Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7942-7951} }
HOT3D: Hand and Object Tracking in 3D from Egocentric Multi-View Videos: Prithviraj Banerjee,

Sindi Shkodrani,

Pierre Moulon,

Shreyas Hampali,

Shangchen Han,

Fan Zhang,

Linguang Zhang,

Jade Fountain,

Edward Miller,

Selen Basol,

Richard Newcombe,

Robert Wang,

Jakob Julian Engel,

Tomas Hodan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Banerjee_2025_CVPR, author = {Banerjee, Prithviraj and Shkodrani, Sindi and Moulon, Pierre and Hampali, Shreyas and Han, Shangchen and Zhang, Fan and Zhang, Linguang and Fountain, Jade and Miller, Edward and Basol, Selen and Newcombe, Richard and Wang, Robert and Engel, Jakob Julian and Hodan, Tomas}, title = {HOT3D: Hand and Object Tracking in 3D from Egocentric Multi-View Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7061-7071} }
DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation: Zhiqiang Shen,

Ammar Sherif,

Zeyuan Yin,

Shitong Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Zhiqiang and Sherif, Ammar and Yin, Zeyuan and Shao, Shitong}, title = {DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4797-4806} }
RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete: Yuheng Ji,

Huajie Tan,

Jiayu Shi,

Xiaoshuai Hao,

Yuan Zhang,

Hengyuan Zhang,

Pengwei Wang,

Mengdi Zhao,

Yao Mu,

Pengju An,

Xinda Xue,

Qinghang Su,

Huaihai Lyu,

Xiaolong Zheng,

Jiaming Liu,

Zhongyuan Wang,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Yuheng and Tan, Huajie and Shi, Jiayu and Hao, Xiaoshuai and Zhang, Yuan and Zhang, Hengyuan and Wang, Pengwei and Zhao, Mengdi and Mu, Yao and An, Pengju and Xue, Xinda and Su, Qinghang and Lyu, Huaihai and Zheng, Xiaolong and Liu, Jiaming and Wang, Zhongyuan and Zhang, Shanghang}, title = {RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1724-1734} }
Beyond Image Classification: A Video Benchmark and Dual-Branch Hybrid Discrimination Framework for Compositional Zero-Shot Learning: Dongyao Jiang,

Haodong Jing,

Yongqiang Ma,

Nanning Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Dongyao and Jing, Haodong and Ma, Yongqiang and Zheng, Nanning}, title = {Beyond Image Classification: A Video Benchmark and Dual-Branch Hybrid Discrimination Framework for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9860-9869} }
ABBSPO: Adaptive Bounding Box Scaling and Symmetric Prior based Orientation Prediction for Detecting Aerial Image Objects: Woojin Lee,

Hyugjae Chang,

Jaeho Moon,

Jaehyup Lee,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Woojin and Chang, Hyugjae and Moon, Jaeho and Lee, Jaehyup and Kim, Munchurl}, title = {ABBSPO: Adaptive Bounding Box Scaling and Symmetric Prior based Orientation Prediction for Detecting Aerial Image Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8848-8858} }
Do Your Best and Get Enough Rest for Continual Learning: Hankyul Kang,

Gregor Seifer,

Donghyun Lee,

Jongbin Ryu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Hankyul and Seifer, Gregor and Lee, Donghyun and Ryu, Jongbin}, title = {Do Your Best and Get Enough Rest for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10077-10086} }
Enhancing Few-Shot Class-Incremental Learning via Training-Free Bi-Level Modality Calibration: Yiyang Chen,

Tianyu Ding,

Lei Wang,

Jing Huo,

Yang Gao,

Wenbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yiyang and Ding, Tianyu and Wang, Lei and Huo, Jing and Gao, Yang and Li, Wenbin}, title = {Enhancing Few-Shot Class-Incremental Learning via Training-Free Bi-Level Modality Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9881-9890} }
MUSt3R: Multi-view Network for Stereo 3D Reconstruction: Yohann Cabon,

Lucas Stoffl,

Leonid Antsfeld,

Gabriela Csurka,

Boris Chidlovskii,

Jerome Revaud,

Vincent Leroy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cabon_2025_CVPR, author = {Cabon, Yohann and Stoffl, Lucas and Antsfeld, Leonid and Csurka, Gabriela and Chidlovskii, Boris and Revaud, Jerome and Leroy, Vincent}, title = {MUSt3R: Multi-view Network for Stereo 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1050-1060} }
Hybrid-Level Instruction Injection for Video Token Compression in Multi-modal Large Language Models: Zhihang Liu,

Chen-Wei Xie,

Pandeng Li,

Liming Zhao,

Longxiang Tang,

Yun Zheng,

Chuanbin Liu,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhihang and Xie, Chen-Wei and Li, Pandeng and Zhao, Liming and Tang, Longxiang and Zheng, Yun and Liu, Chuanbin and Xie, Hongtao}, title = {Hybrid-Level Instruction Injection for Video Token Compression in Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8568-8578} }
A New Statistical Model of Star Speckles for Learning to Detect and Characterize Exoplanets in Direct Imaging Observations: Théo Bodrito,

Olivier Flasseur,

Julien Mairal,

Jean Ponce,

Maud Langlois,

Anne-Marie Lagrange; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bodrito_2025_CVPR, author = {Bodrito, Th\'eo and Flasseur, Olivier and Mairal, Julien and Ponce, Jean and Langlois, Maud and Lagrange, Anne-Marie}, title = {A New Statistical Model of Star Speckles for Learning to Detect and Characterize Exoplanets in Direct Imaging Observations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1230-1240} }
CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models: Qingqing Zhao,

Yao Lu,

Moo Jin Kim,

Zipeng Fu,

Zhuoyang Zhang,

Yecheng Wu,

Zhaoshuo Li,

Qianli Ma,

Song Han,

Chelsea Finn,

Ankur Handa,

Tsung-Yi Lin,

Gordon Wetzstein,

Ming-Yu Liu,

Donglai Xiang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Qingqing and Lu, Yao and Kim, Moo Jin and Fu, Zipeng and Zhang, Zhuoyang and Wu, Yecheng and Li, Zhaoshuo and Ma, Qianli and Han, Song and Finn, Chelsea and Handa, Ankur and Lin, Tsung-Yi and Wetzstein, Gordon and Liu, Ming-Yu and Xiang, Donglai}, title = {CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1702-1713} }
WAVE: Weight Templates for Adaptive Initialization of Variable-sized Models: Fu Feng,

Yucheng Xie,

Jing Wang,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Fu and Xie, Yucheng and Wang, Jing and Geng, Xin}, title = {WAVE: Weight Templates for Adaptive Initialization of Variable-sized Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4819-4828} }
CXPMRG-Bench: Pre-training and Benchmarking for X-ray Medical Report Generation on CheXpert Plus Dataset: Xiao Wang,

Fuling Wang,

Yuehang Li,

Qingchuan Ma,

Shiao Wang,

Bo Jiang,

Jin Tang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xiao and Wang, Fuling and Li, Yuehang and Ma, Qingchuan and Wang, Shiao and Jiang, Bo and Tang, Jin}, title = {CXPMRG-Bench: Pre-training and Benchmarking for X-ray Medical Report Generation on CheXpert Plus Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5123-5133} }
Event-Equalized Dense Video Captioning: Kangyi Wu,

Pengna Li,

Jingwen Fu,

Yizhe Li,

Yang Wu,

Yuhan Liu,

Jinjun Wang,

Sanping Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Kangyi and Li, Pengna and Fu, Jingwen and Li, Yizhe and Wu, Yang and Liu, Yuhan and Wang, Jinjun and Zhou, Sanping}, title = {Event-Equalized Dense Video Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8417-8427} }
EDCFlow: Exploring Temporally Dense Difference Maps for Event-based Optical Flow Estimation: Daikun Liu,

Lei Cheng,

Teng Wang,

Changyin Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Daikun and Cheng, Lei and Wang, Teng and Sun, Changyin}, title = {EDCFlow: Exploring Temporally Dense Difference Maps for Event-based Optical Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1984-1993} }
LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions: Faridoun Mehri,

Mahdieh Soleymani Baghshah,

Mohammad Taher Pilehvar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehri_2025_CVPR, author = {Mehri, Faridoun and Baghshah, Mahdieh Soleymani and Pilehvar, Mohammad Taher}, title = {LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {67-78} }
Lost in Translation, Found in Context: Sign Language Translation with Contextual Cues: Youngjoon Jang,

Haran Raajesh,

Liliane Momeni,

Gül Varol,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Youngjoon and Raajesh, Haran and Momeni, Liliane and Varol, G\"ul and Zisserman, Andrew}, title = {Lost in Translation, Found in Context: Sign Language Translation with Contextual Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8742-8752} }
Synchronized Video-to-Audio Generation via Mel Quantization-Continuum Decomposition: Juncheng Wang,

Chao Xu,

Cheng Yu,

Lei Shang,

Zhe Hu,

Shujun Wang,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Juncheng and Xu, Chao and Yu, Cheng and Shang, Lei and Hu, Zhe and Wang, Shujun and Bo, Liefeng}, title = {Synchronized Video-to-Audio Generation via Mel Quantization-Continuum Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3111-3120} }
FATE: Full-head Gaussian Avatar with Textural Editing from Monocular Video: Jiawei Zhang,

Zijian Wu,

Zhiyang Liang,

Yicheng Gong,

Dongfang Hu,

Yao Yao,

Xun Cao,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiawei and Wu, Zijian and Liang, Zhiyang and Gong, Yicheng and Hu, Dongfang and Yao, Yao and Cao, Xun and Zhu, Hao}, title = {FATE: Full-head Gaussian Avatar with Textural Editing from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5535-5545} }
Touch2Shape: Touch-Conditioned 3D Diffusion for Shape Exploration and Reconstruction: Yuanbo Wang,

Zhaoxuan Zhang,

Jiajin Qiu,

Dilong Sun,

Zhengyu Meng,

Xiaopeng Wei,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuanbo and Zhang, Zhaoxuan and Qiu, Jiajin and Sun, Dilong and Meng, Zhengyu and Wei, Xiaopeng and Yang, Xin}, title = {Touch2Shape: Touch-Conditioned 3D Diffusion for Shape Exploration and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5656-5665} }
VITED: Video Temporal Evidence Distillation: Yujie Lu,

Yale Song,

William Wang,

Lorenzo Torresani,

Tushar Nagarajan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yujie and Song, Yale and Wang, William and Torresani, Lorenzo and Nagarajan, Tushar}, title = {VITED: Video Temporal Evidence Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8501-8511} }
Temporal Score Analysis for Understanding and Correcting Diffusion Artifacts: Yu Cao,

Zengqun Zhao,

Ioannis Patras,

Shaogang Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Yu and Zhao, Zengqun and Patras, Ioannis and Gong, Shaogang}, title = {Temporal Score Analysis for Understanding and Correcting Diffusion Artifacts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7707-7716} }
Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise: Ryan Burgert,

Yuancheng Xu,

Wenqi Xian,

Oliver Pilarski,

Pascal Clausen,

Mingming He,

Li Ma,

Yitong Deng,

Lingxiao Li,

Mohsen Mousavi,

Michael Ryoo,

Paul Debevec,

Ning Yu; [pdf] [supp]
[bibtex]
@InProceedings{Burgert_2025_CVPR, author = {Burgert, Ryan and Xu, Yuancheng and Xian, Wenqi and Pilarski, Oliver and Clausen, Pascal and He, Mingming and Ma, Li and Deng, Yitong and Li, Lingxiao and Mousavi, Mohsen and Ryoo, Michael and Debevec, Paul and Yu, Ning}, title = {Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13-23} }
Vision-Language Gradient Descent-driven All-in-One Deep Unfolding Networks: Haijin Zeng,

Xiangming Wang,

Yongyong Chen,

Jingyong Su,

Jie Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Haijin and Wang, Xiangming and Chen, Yongyong and Su, Jingyong and Liu, Jie}, title = {Vision-Language Gradient Descent-driven All-in-One Deep Unfolding Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7524-7533} }
3D-LLaVA: Towards Generalist 3D LMMs with Omni Superpoint Transformer: Jiajun Deng,

Tianyu He,

Li Jiang,

Tianyu Wang,

Feras Dayoub,

Ian Reid; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Jiajun and He, Tianyu and Jiang, Li and Wang, Tianyu and Dayoub, Feras and Reid, Ian}, title = {3D-LLaVA: Towards Generalist 3D LMMs with Omni Superpoint Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3772-3782} }
Boost the Inference with Co-training: A Depth-guided Mutual Learning Framework for Semi-supervised Medical Polyp Segmentation: Yuxin Li,

Zihao Zhu,

Yuxiang Zhang,

Yifan Chen,

Zhibin Yu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuxin and Zhu, Zihao and Zhang, Yuxiang and Chen, Yifan and Yu, Zhibin}, title = {Boost the Inference with Co-training: A Depth-guided Mutual Learning Framework for Semi-supervised Medical Polyp Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10394-10403} }
From Laboratory to Real World: A New Benchmark Towards Privacy-Preserved Visible-Infrared Person Re-Identification: Yan Jiang,

Hao Yu,

Xu Cheng,

Haoyu Chen,

Zhaodong Sun,

Guoying Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yan and Yu, Hao and Cheng, Xu and Chen, Haoyu and Sun, Zhaodong and Zhao, Guoying}, title = {From Laboratory to Real World: A New Benchmark Towards Privacy-Preserved Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8828-8837} }
4Deform: Neural Surface Deformation for Robust Shape Interpolation: Lu Sang,

Zehranaz Canfes,

Dongliang Cao,

Riccardo Marin,

Florian Bernard,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sang_2025_CVPR, author = {Sang, Lu and Canfes, Zehranaz and Cao, Dongliang and Marin, Riccardo and Bernard, Florian and Cremers, Daniel}, title = {4Deform: Neural Surface Deformation for Robust Shape Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6542-6551} }
Dense Match Summarization for Faster Two-view Estimation: Jonathan Astermark,

Anders Heyden,

Viktor Larsson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Astermark_2025_CVPR, author = {Astermark, Jonathan and Heyden, Anders and Larsson, Viktor}, title = {Dense Match Summarization for Faster Two-view Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1093-1102} }
Align-A-Video: Deterministic Reward Tuning of Image Diffusion Models for Consistent Video Editing: Shengzhi Wang,

Yingkang Zhong,

Jiangchuan Mu,

Kai Wu,

Mingliang Xiong,

Wen Fang,

Mingqing Liu,

Hao Deng,

Bin He,

Gang Li,

Qingwen Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shengzhi and Zhong, Yingkang and Mu, Jiangchuan and Wu, Kai and Xiong, Mingliang and Fang, Wen and Liu, Mingqing and Deng, Hao and He, Bin and Li, Gang and Liu, Qingwen}, title = {Align-A-Video: Deterministic Reward Tuning of Image Diffusion Models for Consistent Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2074-2083} }
LION-FS: Fast & Slow Video-Language Thinker as Online Video Assistant: Wei Li,

Bing Hu,

Rui Shao,

Leyang Shen,

Liqiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wei and Hu, Bing and Shao, Rui and Shen, Leyang and Nie, Liqiang}, title = {LION-FS: Fast \& Slow Video-Language Thinker as Online Video Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3240-3251} }
Motion-Grounded Video Reasoning: Understanding and Perceiving Motion at Pixel Level: Andong Deng,

Tongjia Chen,

Shoubin Yu,

Taojiannan Yang,

Lincoln Spencer,

Yapeng Tian,

Ajmal Saeed Mian,

Mohit Bansal,

Chen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Andong and Chen, Tongjia and Yu, Shoubin and Yang, Taojiannan and Spencer, Lincoln and Tian, Yapeng and Mian, Ajmal Saeed and Bansal, Mohit and Chen, Chen}, title = {Motion-Grounded Video Reasoning: Understanding and Perceiving Motion at Pixel Level}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8625-8636} }
Toward Robust Neural Reconstruction from Sparse Point Sets: Amine Ouasfi,

Shubhendu Jena,

Eric Marchand,

Adnane Boukhayma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouasfi_2025_CVPR, author = {Ouasfi, Amine and Jena, Shubhendu and Marchand, Eric and Boukhayma, Adnane}, title = {Toward Robust Neural Reconstruction from Sparse Point Sets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6552-6562} }
GPAvatar: High-fidelity Head Avatars by Learning Efficient Gaussian Projections: Wei-Qi Feng,

Dong Han,

Ze-Kang Zhou,

Shunkai Li,

Xiaoqiang Liu,

Pengfei Wan,

Di Zhang,

Miao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Wei-Qi and Han, Dong and Zhou, Ze-Kang and Li, Shunkai and Liu, Xiaoqiang and Wan, Pengfei and Zhang, Di and Wang, Miao}, title = {GPAvatar: High-fidelity Head Avatars by Learning Efficient Gaussian Projections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {250-259} }
PIAD: Pose and Illumination agnostic Anomaly Detection: Kaichen Yang,

Junjie Cao,

Zeyu Bai,

Zhixun Su,

Andrea Tagliasacchi; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Kaichen and Cao, Junjie and Bai, Zeyu and Su, Zhixun and Tagliasacchi, Andrea}, title = {PIAD: Pose and Illumination agnostic Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4734-4743} }
Two is Better than One: Efficient Ensemble Defense for Robust and Compact Models: Yoojin Jung,

Byung Cheol Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Yoojin and Song, Byung Cheol}, title = {Two is Better than One: Efficient Ensemble Defense for Robust and Compact Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9696-9706} }
Tiled Diffusion: Or Madar,

Ohad Fried; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Madar_2025_CVPR, author = {Madar, Or and Fried, Ohad}, title = {Tiled Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7795-7804} }
Descriptor-In-Pixel : Point-Feature Tracking For Pixel Processor Arrays: Laurie Bose,

Jianing Chen,

Piotr Dudek; [pdf] [supp]
[bibtex]
@InProceedings{Bose_2025_CVPR, author = {Bose, Laurie and Chen, Jianing and Dudek, Piotr}, title = {Descriptor-In-Pixel : Point-Feature Tracking For Pixel Processor Arrays}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5392-5400} }
UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping: Aashish Rai,

Dilin Wang,

Mihir Jain,

Nikolaos Sarafianos,

Kefan Chen,

Srinath Sridhar,

Aayush Prakash; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rai_2025_CVPR, author = {Rai, Aashish and Wang, Dilin and Jain, Mihir and Sarafianos, Nikolaos and Chen, Kefan and Sridhar, Srinath and Prakash, Aayush}, title = {UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5927-5937} }
InterAct: Advancing Large-Scale Versatile 3D Human-Object Interaction Generation: Sirui Xu,

Dongting Li,

Yucheng Zhang,

Xiyan Xu,

Qi Long,

Ziyin Wang,

Yunzhi Lu,

Shuchang Dong,

Hezi Jiang,

Akshat Gupta,

Yu-Xiong Wang,

Liang-Yan Gui; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Sirui and Li, Dongting and Zhang, Yucheng and Xu, Xiyan and Long, Qi and Wang, Ziyin and Lu, Yunzhi and Dong, Shuchang and Jiang, Hezi and Gupta, Akshat and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterAct: Advancing Large-Scale Versatile 3D Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7048-7060} }
TAMT: Temporal-Aware Model Tuning for Cross-Domain Few-Shot Action Recognition: Yilong Wang,

Zilin Gao,

Qilong Wang,

Zhaofeng Chen,

Peihua Li,

Qinghua Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yilong and Gao, Zilin and Wang, Qilong and Chen, Zhaofeng and Li, Peihua and Hu, Qinghua}, title = {TAMT: Temporal-Aware Model Tuning for Cross-Domain Few-Shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3449-3459} }
BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology: Amaya Gallagher-Syed,

Henry Senior,

Omnia Alwazzan,

Elena Pontarini,

Michele Bombardieri,

Costantino Pitzalis,

Myles J. Lewis,

Michael R. Barnes,

Luca Rossi,

Gregory Slabaugh; [pdf] [supp]
[bibtex]
@InProceedings{Gallagher-Syed_2025_CVPR, author = {Gallagher-Syed, Amaya and Senior, Henry and Alwazzan, Omnia and Pontarini, Elena and Bombardieri, Michele and Pitzalis, Costantino and Lewis, Myles J. and Barnes, Michael R. and Rossi, Luca and Slabaugh, Gregory}, title = {BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10372-10383} }
GauCho: Gaussian Distributions with Cholesky Decomposition for Oriented Object Detection: José Henrique Lima Marques,

Jeffri Murrugarra-Llerena,

Claudio R. Jung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marques_2025_CVPR, author = {Marques, Jos\'e Henrique Lima and Murrugarra-Llerena, Jeffri and Jung, Claudio R.}, title = {GauCho: Gaussian Distributions with Cholesky Decomposition for Oriented Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3593-3602} }
No Thing, Nothing: Highlighting Safety-Critical Classes for Robust LiDAR Semantic Segmentation in Adverse Weather: Junsung Park,

Hwijeong Lee,

Inha Kang,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Junsung and Lee, Hwijeong and Kang, Inha and Shim, Hyunjung}, title = {No Thing, Nothing: Highlighting Safety-Critical Classes for Robust LiDAR Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6690-6699} }
Mind the Gap: Detecting Black-box Adversarial Attacks in the Making through Query Update Analysis: Jeonghwan Park,

Niall McLaughlin,

Ihsen Alouani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jeonghwan and McLaughlin, Niall and Alouani, Ihsen}, title = {Mind the Gap: Detecting Black-box Adversarial Attacks in the Making through Query Update Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10235-10243} }
GaussianWorld: Gaussian World Model for Streaming 3D Occupancy Prediction: Sicheng Zuo,

Wenzhao Zheng,

Yuanhui Huang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zuo_2025_CVPR, author = {Zuo, Sicheng and Zheng, Wenzhao and Huang, Yuanhui and Zhou, Jie and Lu, Jiwen}, title = {GaussianWorld: Gaussian World Model for Streaming 3D Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6772-6781} }
ICP: Immediate Compensation Pruning for Mid-to-high Sparsity: Xin Luo,

Xueming Fu,

Zihang Jiang,

S. Kevin Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Xin and Fu, Xueming and Jiang, Zihang and Zhou, S. Kevin}, title = {ICP: Immediate Compensation Pruning for Mid-to-high Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9487-9496} }
VinaBench: Benchmark for Faithful and Consistent Visual Narratives: Silin Gao,

Sheryl Mathew,

Li Mi,

Sepideh Mamooler,

Mengjie Zhao,

Hiromi Wakaki,

Yuki Mitsufuji,

Syrielle Montariol,

Antoine Bosselut; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Silin and Mathew, Sheryl and Mi, Li and Mamooler, Sepideh and Zhao, Mengjie and Wakaki, Hiromi and Mitsufuji, Yuki and Montariol, Syrielle and Bosselut, Antoine}, title = {VinaBench: Benchmark for Faithful and Consistent Visual Narratives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2870-2879} }
Dual Diffusion for Unified Image Generation and Understanding: Zijie Li,

Henry Li,

Yichun Shi,

Amir Barati Farimani,

Yuval Kluger,

Linjie Yang,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zijie and Li, Henry and Shi, Yichun and Farimani, Amir Barati and Kluger, Yuval and Yang, Linjie and Wang, Peng}, title = {Dual Diffusion for Unified Image Generation and Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2779-2790} }
WeakMCN: Multi-task Collaborative Network for Weakly Supervised Referring Expression Comprehension and Segmentation: Silin Cheng,

Yang Liu,

Xinwei He,

Sebastien Ourselin,

Lei Tan,

Gen Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Silin and Liu, Yang and He, Xinwei and Ourselin, Sebastien and Tan, Lei and Luo, Gen}, title = {WeakMCN: Multi-task Collaborative Network for Weakly Supervised Referring Expression Comprehension and Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9175-9185} }
4DGC: Rate-Aware 4D Gaussian Compression for Efficient Streamable Free-Viewpoint Video: Qiang Hu,

Zihan Zheng,

Houqiang Zhong,

Sihua Fu,

Li Song,

Xiaoyun Zhang,

Guangtao Zhai,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Qiang and Zheng, Zihan and Zhong, Houqiang and Fu, Sihua and Song, Li and Zhang, Xiaoyun and Zhai, Guangtao and Wang, Yanfeng}, title = {4DGC: Rate-Aware 4D Gaussian Compression for Efficient Streamable Free-Viewpoint Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {875-885} }
GASP: Gaussian Avatars with Synthetic Priors: Jack Saunders,

Charlie Hewitt,

Yanan Jian,

Marek Kowalski,

Tadas Baltrusaitis,

Yiye Chen,

Darren Cosker,

Virginia Estellers,

Nicholas Gydé,

Vinay P. Namboodiri,

Benjamin E. Lundell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saunders_2025_CVPR, author = {Saunders, Jack and Hewitt, Charlie and Jian, Yanan and Kowalski, Marek and Baltrusaitis, Tadas and Chen, Yiye and Cosker, Darren and Estellers, Virginia and Gyd\'e, Nicholas and Namboodiri, Vinay P. and Lundell, Benjamin E.}, title = {GASP: Gaussian Avatars with Synthetic Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {271-280} }
COSMIC: Clique-Oriented Semantic Multi-space Integration for Robust CLIP Test-Time Adaptation: Fanding Huang,

Jingyan Jiang,

Qinting Jiang,

Hebei Li,

Faisal Nadeem Khan,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Fanding and Jiang, Jingyan and Jiang, Qinting and Li, Hebei and Khan, Faisal Nadeem and Wang, Zhi}, title = {COSMIC: Clique-Oriented Semantic Multi-space Integration for Robust CLIP Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9772-9781} }
High-quality Point Cloud Oriented Normal Estimation via Hybrid Angular and Euclidean Distance Encoding: Yuanqi Li,

Jingcheng Huang,

Hongshen Wang,

Peiyuan Lv,

Yansong Liu,

Jiuming Zheng,

Jie Guo,

Yanwen Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuanqi and Huang, Jingcheng and Wang, Hongshen and Lv, Peiyuan and Liu, Yansong and Zheng, Jiuming and Guo, Jie and Guo, Yanwen}, title = {High-quality Point Cloud Oriented Normal Estimation via Hybrid Angular and Euclidean Distance Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1287-1296} }
Prior-free 3D Object Tracking: Xiuqiang Song,

Li Jin,

Zhengxian Zhang,

Jiachen Li,

Fan Zhong,

Guofeng Zhang,

Xueying Qin; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Xiuqiang and Jin, Li and Zhang, Zhengxian and Li, Jiachen and Zhong, Fan and Zhang, Guofeng and Qin, Xueying}, title = {Prior-free 3D Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1200-1209} }
Progressive Correspondence Regenerator for Robust 3D Registration: Guiyu Zhao,

Sheng Ao,

Ye Zhang,

Kai Xu,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Guiyu and Ao, Sheng and Zhang, Ye and Xu, Kai and Guo, Yulan}, title = {Progressive Correspondence Regenerator for Robust 3D Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1210-1219} }
Cross-Modal 3D Representation with Multi-View Images and Point Clouds: Ziyang Zhou,

Pinghui Wang,

Zi Liang,

Haitao Bai,

Ruofei Zhang; [pdf]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Ziyang and Wang, Pinghui and Liang, Zi and Bai, Haitao and Zhang, Ruofei}, title = {Cross-Modal 3D Representation with Multi-View Images and Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3728-3739} }
Decompositional Neural Scene Reconstruction with Generative Diffusion Prior: Junfeng Ni,

Yu Liu,

Ruijie Lu,

Zirui Zhou,

Song-Chun Zhu,

Yixin Chen,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Junfeng and Liu, Yu and Lu, Ruijie and Zhou, Zirui and Zhu, Song-Chun and Chen, Yixin and Huang, Siyuan}, title = {Decompositional Neural Scene Reconstruction with Generative Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6022-6033} }
Learning Visual Generative Priors without Text: Shuailei Ma,

Kecheng Zheng,

Ying Wei,

Wei Wu,

Fan Lu,

Yifei Zhang,

Chen-Wei Xie,

Biao Gong,

Jiapeng Zhu,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Shuailei and Zheng, Kecheng and Wei, Ying and Wu, Wei and Lu, Fan and Zhang, Yifei and Xie, Chen-Wei and Gong, Biao and Zhu, Jiapeng and Shen, Yujun}, title = {Learning Visual Generative Priors without Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8051-8061} }
Lifting the Veil on Visual Information Flow in MLLMs: Unlocking Pathways to Faster Inference: Hao Yin,

Guangzong Si,

Zilei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Hao and Si, Guangzong and Wang, Zilei}, title = {Lifting the Veil on Visual Information Flow in MLLMs: Unlocking Pathways to Faster Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9382-9391} }
Pixel-level and Semantic-level Adjustable Super-resolution: A Dual-LoRA Approach: Lingchen Sun,

Rongyuan Wu,

Zhiyuan Ma,

Shuaizheng Liu,

Qiaosi Yi,

Lei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Lingchen and Wu, Rongyuan and Ma, Zhiyuan and Liu, Shuaizheng and Yi, Qiaosi and Zhang, Lei}, title = {Pixel-level and Semantic-level Adjustable Super-resolution: A Dual-LoRA Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2333-2343} }
Mr. DETR: Instructive Multi-Route Training for Detection Transformers: Chang-Bin Zhang,

Yujie Zhong,

Kai Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chang-Bin and Zhong, Yujie and Han, Kai}, title = {Mr. DETR: Instructive Multi-Route Training for Detection Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9933-9943} }
Hearing Hands: Generating Sounds from Physical Interactions in 3D Scenes: Yiming Dou,

Wonseok Oh,

Yuqing Luo,

Antonio Loquercio,

Andrew Owens; [pdf] [arXiv]
[bibtex]
@InProceedings{Dou_2025_CVPR, author = {Dou, Yiming and Oh, Wonseok and Luo, Yuqing and Loquercio, Antonio and Owens, Andrew}, title = {Hearing Hands: Generating Sounds from Physical Interactions in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1795-1804} }
AirRoom: Objects Matter in Room Reidentification: Runmao Yao,

Yi Du,

Zhuoqun Chen,

Haoze Zheng,

Chen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Runmao and Du, Yi and Chen, Zhuoqun and Zheng, Haoze and Wang, Chen}, title = {AirRoom: Objects Matter in Room Reidentification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1385-1394} }
DefMamba: Deformable Visual State Space Model: Leiye Liu,

Miao Zhang,

Jihao Yin,

Tingwei Liu,

Wei Ji,

Yongri Piao,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Leiye and Zhang, Miao and Yin, Jihao and Liu, Tingwei and Ji, Wei and Piao, Yongri and Lu, Huchuan}, title = {DefMamba: Deformable Visual State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8838-8847} }
HOP: Heterogeneous Topology-based Multimodal Entanglement for Co-Speech Gesture Generation: Hongye Cheng,

Tianyu Wang,

Guangsi Shi,

Zexing Zhao,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Hongye and Wang, Tianyu and Shi, Guangsi and Zhao, Zexing and Fu, Yanwei}, title = {HOP: Heterogeneous Topology-based Multimodal Entanglement for Co-Speech Gesture Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {906-916} }
VoxelSplat: Dynamic Gaussian Splatting as an Effective Loss for Occupancy and Flow Prediction: Ziyue Zhu,

Shenlong Wang,

Jin Xie,

Jiang-jiang Liu,

Jingdong Wang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ziyue and Wang, Shenlong and Xie, Jin and Liu, Jiang-jiang and Wang, Jingdong and Yang, Jian}, title = {VoxelSplat: Dynamic Gaussian Splatting as an Effective Loss for Occupancy and Flow Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6761-6771} }
ControlFace: Harnessing Facial Parametric Control for Face Rigging: Wooseok Jang,

Youngjun Hong,

Geonho Cha,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Wooseok and Hong, Youngjun and Cha, Geonho and Kim, Seungryong}, title = {ControlFace: Harnessing Facial Parametric Control for Face Rigging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5614-5624} }
Imputation-free and Alignment-free: Incomplete Multi-view Clustering Driven by Consensus Semantic Learning: Yuzhuo Dai,

Jiaqi Jin,

Zhibin Dong,

Siwei Wang,

Xinwang Liu,

En Zhu,

Xihong Yang,

Xinbiao Gan,

Yu Feng; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Yuzhuo and Jin, Jiaqi and Dong, Zhibin and Wang, Siwei and Liu, Xinwang and Zhu, En and Yang, Xihong and Gan, Xinbiao and Feng, Yu}, title = {Imputation-free and Alignment-free: Incomplete Multi-view Clustering Driven by Consensus Semantic Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5071-5081} }
Sensitivity-Aware Efficient Fine-Tuning via Compact Dynamic-Rank Adaptation: Tianran Chen,

Jiarui Chen,

Baoquan Zhang,

Zhehao Yu,

Shidong Chen,

Rui Ye,

Xutao Li,

Yunming Ye; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Tianran and Chen, Jiarui and Zhang, Baoquan and Yu, Zhehao and Chen, Shidong and Ye, Rui and Li, Xutao and Ye, Yunming}, title = {Sensitivity-Aware Efficient Fine-Tuning via Compact Dynamic-Rank Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9655-9664} }
A Selective Re-learning Mechanism for Hyperspectral Fusion Imaging: Yuanye Liu,

Jinyang Liu,

Renwei Dian,

Shutao Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanye and Liu, Jinyang and Dian, Renwei and Li, Shutao}, title = {A Selective Re-learning Mechanism for Hyperspectral Fusion Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7437-7446} }
Autoregressive Sequential Pretraining for Visual Tracking: Shiyi Liang,

Yifan Bai,

Yihong Gong,

Xing Wei; [pdf]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Shiyi and Bai, Yifan and Gong, Yihong and Wei, Xing}, title = {Autoregressive Sequential Pretraining for Visual Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7254-7264} }
PromptHMR: Promptable Human Mesh Recovery: Yufu Wang,

Yu Sun,

Priyanka Patel,

Kostas Daniilidis,

Michael J. Black,

Muhammed Kocabas; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yufu and Sun, Yu and Patel, Priyanka and Daniilidis, Kostas and Black, Michael J. and Kocabas, Muhammed}, title = {PromptHMR: Promptable Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1148-1159} }
VISTREAM: Improving Computation Efficiency of Visual Streaming Perception via Law-of-Charge-Conservation Inspired Spiking Neural Network: Kang You,

Ziling Wei,

Jing Yan,

Boning Zhang,

Qinghai Guo,

Yaoyu Zhang,

Zhezhi He; [pdf] [supp]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Kang and Wei, Ziling and Yan, Jing and Zhang, Boning and Guo, Qinghai and Zhang, Yaoyu and He, Zhezhi}, title = {VISTREAM: Improving Computation Efficiency of Visual Streaming Perception via Law-of-Charge-Conservation Inspired Spiking Neural Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8796-8805} }
STPro: Spatial and Temporal Progressive Learning for Weakly Supervised Spatio-Temporal Grounding: Aaryan Garg,

Akash Kumar,

Yogesh S Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garg_2025_CVPR, author = {Garg, Aaryan and Kumar, Akash and Rawat, Yogesh S}, title = {STPro: Spatial and Temporal Progressive Learning for Weakly Supervised Spatio-Temporal Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3384-3394} }
Rashomon Sets for Prototypical-Part Networks: Editing Interpretable Models in Real-Time: Jon Donnelly,

Zhicheng Guo,

Alina Jade Barnett,

Hayden McTavish,

Chaofan Chen,

Cynthia Rudin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Donnelly_2025_CVPR, author = {Donnelly, Jon and Guo, Zhicheng and Barnett, Alina Jade and McTavish, Hayden and Chen, Chaofan and Rudin, Cynthia}, title = {Rashomon Sets for Prototypical-Part Networks: Editing Interpretable Models in Real-Time}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4528-4538} }
EnvPoser: Environment-aware Realistic Human Motion Estimation from Sparse Observations with Uncertainty Modeling: Songpengcheng Xia,

Yu Zhang,

Zhuo Su,

Xiaozheng Zheng,

Zheng Lv,

Guidong Wang,

Yongjie Zhang,

Qi Wu,

Lei Chu,

Ling Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Songpengcheng and Zhang, Yu and Su, Zhuo and Zheng, Xiaozheng and Lv, Zheng and Wang, Guidong and Zhang, Yongjie and Wu, Qi and Chu, Lei and Pei, Ling}, title = {EnvPoser: Environment-aware Realistic Human Motion Estimation from Sparse Observations with Uncertainty Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1839-1849} }
Tuning the Frequencies: Robust Training for Sinusoidal Neural Networks: Tiago Novello,

Diana Aldana,

Andre Araujo,

Luiz Velho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Novello_2025_CVPR, author = {Novello, Tiago and Aldana, Diana and Araujo, Andre and Velho, Luiz}, title = {Tuning the Frequencies: Robust Training for Sinusoidal Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3071-3080} }
Real-time Free-view Human Rendering from Sparse-view RGB Videos using Double Unprojected Textures: Guoxing Sun,

Rishabh Dabral,

Heming Zhu,

Pascal Fua,

Christian Theobalt,

Marc Habermann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Guoxing and Dabral, Rishabh and Zhu, Heming and Fua, Pascal and Theobalt, Christian and Habermann, Marc}, title = {Real-time Free-view Human Rendering from Sparse-view RGB Videos using Double Unprojected Textures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {562-573} }
Large Self-Supervised Models Bridge the Gap in Domain Adaptive Object Detection: Marc-Antoine Lavoie,

Anas Mahmoud,

Steven L. Waslander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lavoie_2025_CVPR, author = {Lavoie, Marc-Antoine and Mahmoud, Anas and Waslander, Steven L.}, title = {Large Self-Supervised Models Bridge the Gap in Domain Adaptive Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4692-4702} }
Evaluating Model Perception of Color Illusions in Photorealistic Scenes: Lingjun Mao,

Zineng Tang,

Alane Suhr; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Lingjun and Tang, Zineng and Suhr, Alane}, title = {Evaluating Model Perception of Color Illusions in Photorealistic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7805-7814} }
Do Visual Imaginations Improve Vision-and-Language Navigation Agents?: Akhil Perincherry,

Jacob Krantz,

Stefan Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Perincherry_2025_CVPR, author = {Perincherry, Akhil and Krantz, Jacob and Lee, Stefan}, title = {Do Visual Imaginations Improve Vision-and-Language Navigation Agents?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3846-3855} }
HotSpot: Signed Distance Function Optimization with an Asymptotically Sufficient Condition: Zimo Wang,

Cheng Wang,

Taiki Yoshino,

Sirui Tao,

Ziyang Fu,

Tzu-Mao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zimo and Wang, Cheng and Yoshino, Taiki and Tao, Sirui and Fu, Ziyang and Li, Tzu-Mao}, title = {HotSpot: Signed Distance Function Optimization with an Asymptotically Sufficient Condition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1276-1286} }
Libra-Merging: Importance-redundancy and Pruning-merging Trade-off for Acceleration Plug-in in Large Vision-Language Model: Longrong Yang,

Dong Shen,

Chaoxiang Cai,

Kaibing Chen,

Fan Yang,

Tingting Gao,

Di Zhang,

Xi Li; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Longrong and Shen, Dong and Cai, Chaoxiang and Chen, Kaibing and Yang, Fan and Gao, Tingting and Zhang, Di and Li, Xi}, title = {Libra-Merging: Importance-redundancy and Pruning-merging Trade-off for Acceleration Plug-in in Large Vision-Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9402-9412} }
LEDiff: Latent Exposure Diffusion for HDR Generation: Chao Wang,

Zhihao Xia,

Thomas Leimkuhler,

Karol Myszkowski,

Xuaner Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Chao and Xia, Zhihao and Leimkuhler, Thomas and Myszkowski, Karol and Zhang, Xuaner}, title = {LEDiff: Latent Exposure Diffusion for HDR Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {453-464} }
VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation: Ziyang Luo,

Haoning Wu,

Dongxu Li,

Jing Ma,

Mohan Kankanhalli,

Junnan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Ziyang and Wu, Haoning and Li, Dongxu and Ma, Jing and Kankanhalli, Mohan and Li, Junnan}, title = {VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8461-8474} }
Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion: Vitor Guizilini,

Muhammad Zubair Irshad,

Dian Chen,

Greg Shakhnarovich,

Rares Ambrus; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guizilini_2025_CVPR, author = {Guizilini, Vitor and Irshad, Muhammad Zubair and Chen, Dian and Shakhnarovich, Greg and Ambrus, Rares}, title = {Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {764-776} }
Unveiling Visual Perception in Language Models: An Attention Head Analysis Approach: Jing Bi,

Junjia Guo,

Yunlong Tang,

Lianggong Bruce Wen,

Zhang Liu,

Bingjie Wang,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bi_2025_CVPR, author = {Bi, Jing and Guo, Junjia and Tang, Yunlong and Wen, Lianggong Bruce and Liu, Zhang and Wang, Bingjie and Xu, Chenliang}, title = {Unveiling Visual Perception in Language Models: An Attention Head Analysis Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4135-4144} }
SemiDAViL: Semi-supervised Domain Adaptation with Vision-Language Guidance for Semantic Segmentation: Hritam Basak,

Zhaozheng Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Basak_2025_CVPR, author = {Basak, Hritam and Yin, Zhaozheng}, title = {SemiDAViL: Semi-supervised Domain Adaptation with Vision-Language Guidance for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9816-9828} }
dFLMoE: Decentralized Federated Learning via Mixture of Experts for Medical Data Analysis: Luyuan Xie,

Tianyu Luan,

Wenyuan Cai,

Guochen Yan,

Zhaoyu Chen,

Nan Xi,

Yuejian Fang,

Qingni Shen,

Zhonghai Wu,

Junsong Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Luyuan and Luan, Tianyu and Cai, Wenyuan and Yan, Guochen and Chen, Zhaoyu and Xi, Nan and Fang, Yuejian and Shen, Qingni and Wu, Zhonghai and Yuan, Junsong}, title = {dFLMoE: Decentralized Federated Learning via Mixture of Experts for Medical Data Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10203-10213} }
Reconstructing Humans with a Biomechanically Accurate Skeleton: Yan Xia,

Xiaowei Zhou,

Etienne Vouga,

Qixing Huang,

Georgios Pavlakos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Yan and Zhou, Xiaowei and Vouga, Etienne and Huang, Qixing and Pavlakos, Georgios}, title = {Reconstructing Humans with a Biomechanically Accurate Skeleton}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5355-5365} }
AdaCM^2: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction: Yuanbin Man,

Ying Huang,

Chengming Zhang,

Bingzhe Li,

Wei Niu,

Miao Yin; [pdf]
[bibtex]
@InProceedings{Man_2025_CVPR, author = {Man, Yuanbin and Huang, Ying and Zhang, Chengming and Li, Bingzhe and Niu, Wei and Yin, Miao}, title = {AdaCM{\textasciicircum}2: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8534-8544} }
VGGT: Visual Geometry Grounded Transformer: Jianyuan Wang,

Minghao Chen,

Nikita Karaev,

Andrea Vedaldi,

Christian Rupprecht,

David Novotny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jianyuan and Chen, Minghao and Karaev, Nikita and Vedaldi, Andrea and Rupprecht, Christian and Novotny, David}, title = {VGGT: Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5294-5306} }
Silent Branding Attack: Trigger-free Data Poisoning Attack on Text-to-Image Diffusion Models: Sangwon Jang,

June Suk Choi,

Jaehyeong Jo,

Kimin Lee,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Sangwon and Choi, June Suk and Jo, Jaehyeong and Lee, Kimin and Hwang, Sung Ju}, title = {Silent Branding Attack: Trigger-free Data Poisoning Attack on Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8203-8212} }
Visual Consensus Prompting for Co-Salient Object Detection: Jie Wang,

Nana Yu,

Zihao Zhang,

Yahong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jie and Yu, Nana and Zhang, Zihao and Han, Yahong}, title = {Visual Consensus Prompting for Co-Salient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9591-9600} }
Quantization without Tears: Minghao Fu,

Hao Yu,

Jie Shao,

Junjie Zhou,

Ke Zhu,

Jianxin Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Minghao and Yu, Hao and Shao, Jie and Zhou, Junjie and Zhu, Ke and Wu, Jianxin}, title = {Quantization without Tears}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4462-4472} }
PHGC: Procedural Heterogeneous Graph Completion for Natural Language Task Verification in Egocentric Videos: Xun Jiang,

Zhiyi Huang,

Xing Xu,

Jingkuan Song,

Fumin Shen,

Heng Tao Shen; [pdf]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Xun and Huang, Zhiyi and Xu, Xing and Song, Jingkuan and Shen, Fumin and Shen, Heng Tao}, title = {PHGC: Procedural Heterogeneous Graph Completion for Natural Language Task Verification in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8615-8624} }
Towards Consistent Multi-Task Learning: Unlocking the Potential of Task-Specific Parameters: Xiaohan Qin,

Xiaoxing Wang,

Junchi Yan; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Xiaohan and Wang, Xiaoxing and Yan, Junchi}, title = {Towards Consistent Multi-Task Learning: Unlocking the Potential of Task-Specific Parameters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10067-10076} }
SceneFactor: Factored Latent 3D Diffusion for Controllable 3D Scene Generation: Aleksey Bokhovkin,

Quan Meng,

Shubham Tulsiani,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bokhovkin_2025_CVPR, author = {Bokhovkin, Aleksey and Meng, Quan and Tulsiani, Shubham and Dai, Angela}, title = {SceneFactor: Factored Latent 3D Diffusion for Controllable 3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {628-639} }
HiFi-Portrait: Zero-shot Identity-preserved Portrait Generation with High-fidelity Multi-face Fusion: Yifang Xu,

Benxiang Zhai,

Yunzhuo Sun,

Ming Li,

Yang Li,

Sidan Du; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yifang and Zhai, Benxiang and Sun, Yunzhuo and Li, Ming and Li, Yang and Du, Sidan}, title = {HiFi-Portrait: Zero-shot Identity-preserved Portrait Generation with High-fidelity Multi-face Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5625-5635} }
FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis: Wonjoon Jin,

Qi Dai,

Chong Luo,

Seung-Hwan Baek,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Wonjoon and Dai, Qi and Luo, Chong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2040-2049} }
RAD: Region-Aware Diffusion Models for Image Inpainting: Sora Kim,

Sungho Suh,

Minsik Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sora and Suh, Sungho and Lee, Minsik}, title = {RAD: Region-Aware Diffusion Models for Image Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2439-2448} }
Understanding Fine-tuning CLIP for Open-vocabulary Semantic Segmentation in Hyperbolic Space: Zelin Peng,

Zhengqin Xu,

Zhilin Zeng,

Changsong Wen,

Yu Huang,

Menglin Yang,

Feilong Tang,

Wei Shen; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Wen, Changsong and Huang, Yu and Yang, Menglin and Tang, Feilong and Shen, Wei}, title = {Understanding Fine-tuning CLIP for Open-vocabulary Semantic Segmentation in Hyperbolic Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4562-4572} }
TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting: Bojun Xiong,

Jialun Liu,

Jiakui Hu,

Chenming Wu,

Jinbo Wu,

Xing Liu,

Chen Zhao,

Errui Ding,

Zhouhui Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2025_CVPR, author = {Xiong, Bojun and Liu, Jialun and Hu, Jiakui and Wu, Chenming and Wu, Jinbo and Liu, Xing and Zhao, Chen and Ding, Errui and Lian, Zhouhui}, title = {TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {551-561} }
Concept Replacer: Replacing Sensitive Concepts in Diffusion Models via Precision Localization: Lingyun Zhang,

Yu Xie,

Yanwei Fu,

Ping Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lingyun and Xie, Yu and Fu, Yanwei and Chen, Ping}, title = {Concept Replacer: Replacing Sensitive Concepts in Diffusion Models via Precision Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8172-8181} }
A Regularization-Guided Equivariant Approach for Image Restoration: Yulu Bai,

Jiahong Fu,

Qi Xie,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Yulu and Fu, Jiahong and Xie, Qi and Meng, Deyu}, title = {A Regularization-Guided Equivariant Approach for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2300-2310} }
Deep Fair Multi-View Clustering with Attention KAN: HaiMing Xu,

Qianqian Wang,

Boyue Wang,

Quanxue Gao; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, HaiMing and Wang, Qianqian and Wang, Boyue and Gao, Quanxue}, title = {Deep Fair Multi-View Clustering with Attention KAN}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5061-5070} }
LineArt: A Knowledge-guided Training-free High-quality Appearance Transfer for Design Drawing with Diffusion Model: Xi Wang,

Hongzhen Li,

Heng Fang,

Yichen Peng,

Haoran Xie,

Xi Yang,

Chuntao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xi and Li, Hongzhen and Fang, Heng and Peng, Yichen and Xie, Haoran and Yang, Xi and Li, Chuntao}, title = {LineArt: A Knowledge-guided Training-free High-quality Appearance Transfer for Design Drawing with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2912-2923} }
VideoICL: Confidence-based Iterative In-context Learning for Out-of-Distribution Video Understanding: Kangsan Kim,

Geon Park,

Youngwan Lee,

Woongyeong Yeo,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Kangsan and Park, Geon and Lee, Youngwan and Yeo, Woongyeong and Hwang, Sung Ju}, title = {VideoICL: Confidence-based Iterative In-context Learning for Out-of-Distribution Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3295-3305} }
Zero-Shot Image Restoration Using Few-Step Guidance of Consistency Models (and Beyond): Tomer Garber,

Tom Tirer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garber_2025_CVPR, author = {Garber, Tomer and Tirer, Tom}, title = {Zero-Shot Image Restoration Using Few-Step Guidance of Consistency Models (and Beyond)}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2398-2407} }
Similarity-Guided Layer-Adaptive Vision Transformer for UAV Tracking: Chaocan Xue,

Bineng Zhong,

Qihua Liang,

Yaozong Zheng,

Ning Li,

Yuanliang Xue,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Chaocan and Zhong, Bineng and Liang, Qihua and Zheng, Yaozong and Li, Ning and Xue, Yuanliang and Song, Shuxiang}, title = {Similarity-Guided Layer-Adaptive Vision Transformer for UAV Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6730-6740} }
LidarGait++: Learning Local Features and Size Awareness from LiDAR Point Clouds for 3D Gait Recognition: Chuanfu Shen,

Rui Wang,

Lixin Duan,

Shiqi Yu; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Chuanfu and Wang, Rui and Duan, Lixin and Yu, Shiqi}, title = {LidarGait++: Learning Local Features and Size Awareness from LiDAR Point Clouds for 3D Gait Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6627-6636} }
Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model: Benlin Liu,

Yuhao Dong,

Yiqin Wang,

Zixian Ma,

Yansong Tang,

Luming Tang,

Yongming Rao,

Wei-Chiu Ma,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Benlin and Dong, Yuhao and Wang, Yiqin and Ma, Zixian and Tang, Yansong and Tang, Luming and Rao, Yongming and Ma, Wei-Chiu and Krishna, Ranjay}, title = {Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3783-3792} }
FoundationStereo: Zero-Shot Stereo Matching: Bowen Wen,

Matthew Trepte,

Joseph Aribido,

Jan Kautz,

Orazio Gallo,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Bowen and Trepte, Matthew and Aribido, Joseph and Kautz, Jan and Gallo, Orazio and Birchfield, Stan}, title = {FoundationStereo: Zero-Shot Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5249-5260} }
UniNet: A Contrastive Learning-guided Unified Framework with Feature Selection for Anomaly Detection: Shun Wei,

Jielin Jiang,

Xiaolong Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Shun and Jiang, Jielin and Xu, Xiaolong}, title = {UniNet: A Contrastive Learning-guided Unified Framework with Feature Selection for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9994-10003} }
MoEdit: On Learning Quantity Perception for Multi-object Image Editing: Yanfeng Li,

Kahou Chan,

Yue Sun,

Chantong Lam,

Tong Tong,

Zitong Yu,

Keren Fu,

Xiaohong Liu,

Tao Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yanfeng and Chan, Kahou and Sun, Yue and Lam, Chantong and Tong, Tong and Yu, Zitong and Fu, Keren and Liu, Xiaohong and Tan, Tao}, title = {MoEdit: On Learning Quantity Perception for Multi-object Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2683-2693} }
Seeing More with Less: Human-like Representations in Vision Models: Andrey Gizdov,

Shimon Ullman,

Daniel Harari; [pdf] [supp]
[bibtex]
@InProceedings{Gizdov_2025_CVPR, author = {Gizdov, Andrey and Ullman, Shimon and Harari, Daniel}, title = {Seeing More with Less: Human-like Representations in Vision Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4408-4417} }
Modeling Thousands of Human Annotators for Generalizable Text-to-Image Person Re-identification: Jiayu Jiang,

Changxing Ding,

Wentao Tan,

Junhong Wang,

Jin Tao,

Xiangmin Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jiayu and Ding, Changxing and Tan, Wentao and Wang, Junhong and Tao, Jin and Xu, Xiangmin}, title = {Modeling Thousands of Human Annotators for Generalizable Text-to-Image Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9220-9230} }
AeroGen: Enhancing Remote Sensing Object Detection with Diffusion-Driven Data Generation: Datao Tang,

Xiangyong Cao,

Xuan Wu,

Jialin Li,

Jing Yao,

Xueru Bai,

Dongsheng Jiang,

Yin Li,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Datao and Cao, Xiangyong and Wu, Xuan and Li, Jialin and Yao, Jing and Bai, Xueru and Jiang, Dongsheng and Li, Yin and Meng, Deyu}, title = {AeroGen: Enhancing Remote Sensing Object Detection with Diffusion-Driven Data Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3614-3624} }
Tra-MoE: Learning Trajectory Prediction Model from Multiple Domains for Adaptive Policy Conditioning: Jiange Yang,

Haoyi Zhu,

Yating Wang,

Gangshan Wu,

Tong He,

Limin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jiange and Zhu, Haoyi and Wang, Yating and Wu, Gangshan and He, Tong and Wang, Limin}, title = {Tra-MoE: Learning Trajectory Prediction Model from Multiple Domains for Adaptive Policy Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6960-6970} }
Style Quantization for Data-Efficient GAN Training: Jian Wang,

Xin Lan,

Jizhe Zhou,

Yuxin Tian,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Lan, Xin and Zhou, Jizhe and Tian, Yuxin and Lv, Jiancheng}, title = {Style Quantization for Data-Efficient GAN Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7696-7706} }
Localizing Events in Videos with Multimodal Queries: Gengyuan Zhang,

Mang Ling Ada Fok,

Jialu Ma,

Yan Xia,

Daniel Cremers,

Philip Torr,

Volker Tresp,

Jindong Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Gengyuan and Fok, Mang Ling Ada and Ma, Jialu and Xia, Yan and Cremers, Daniel and Torr, Philip and Tresp, Volker and Gu, Jindong}, title = {Localizing Events in Videos with Multimodal Queries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3339-3351} }
PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability: Weijie Zhou,

Manli Tao,

Chaoyang Zhao,

Haiyun Guo,

Honghui Dong,

Ming Tang,

Jinqiao Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Weijie and Tao, Manli and Zhao, Chaoyang and Guo, Haiyun and Dong, Honghui and Tang, Ming and Wang, Jinqiao}, title = {PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6940-6949} }
CleanDIFT: Diffusion Features without Noise: Nick Stracke,

Stefan Andreas Baumann,

Kolja Bauer,

Frank Fundel,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stracke_2025_CVPR, author = {Stracke, Nick and Baumann, Stefan Andreas and Bauer, Kolja and Fundel, Frank and Ommer, Bj\"orn}, title = {CleanDIFT: Diffusion Features without Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {117-127} }
MAD: Memory-Augmented Detection of 3D Objects: Ben Agro,

Sergio Casas,

Patrick Wang,

Thomas Gilles,

Raquel Urtasun; [pdf] [supp]
[bibtex]
@InProceedings{Agro_2025_CVPR, author = {Agro, Ben and Casas, Sergio and Wang, Patrick and Gilles, Thomas and Urtasun, Raquel}, title = {MAD: Memory-Augmented Detection of 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1449-1460} }
Doppelgangers and Adversarial Vulnerability: George Kamberov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kamberov_2025_CVPR, author = {Kamberov, George}, title = {Doppelgangers and Adversarial Vulnerability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10244-10254} }
Precise Event Spotting in Sports Videos: Solving Long-Range Dependency and Class Imbalance: Sanchayan Santra,

Vishal Chudasama,

Pankaj Wasnik,

Vineeth N Balasubramanian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Santra_2025_CVPR, author = {Santra, Sanchayan and Chudasama, Vishal and Wasnik, Pankaj and Balasubramanian, Vineeth N}, title = {Precise Event Spotting in Sports Videos: Solving Long-Range Dependency and Class Imbalance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3163-3172} }
Steady Progress Beats Stagnation: Mutual Aid of Foundation and Conventional Models in Mixed Domain Semi-Supervised Medical Image Segmentation: Qinghe Ma,

Jian Zhang,

Zekun Li,

Lei Qi,

Qian Yu,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Qinghe and Zhang, Jian and Li, Zekun and Qi, Lei and Yu, Qian and Shi, Yinghuan}, title = {Steady Progress Beats Stagnation: Mutual Aid of Foundation and Conventional Models in Mixed Domain Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5175-5185} }
ARKit LabelMaker: A New Scale for Indoor 3D Scene Understanding: Guangda Ji,

Silvan Weder,

Francis Engelmann,

Marc Pollefeys,

Hermann Blum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Guangda and Weder, Silvan and Engelmann, Francis and Pollefeys, Marc and Blum, Hermann}, title = {ARKit LabelMaker: A New Scale for Indoor 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4398-4407} }
StreamingT2V: Consistent, Dynamic, and Extendable Long Video Generation from Text: Roberto Henschel,

Levon Khachatryan,

Hayk Poghosyan,

Daniil Hayrapetyan,

Vahram Tadevosyan,

Zhangyang Wang,

Shant Navasardyan,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Henschel_2025_CVPR, author = {Henschel, Roberto and Khachatryan, Levon and Poghosyan, Hayk and Hayrapetyan, Daniil and Tadevosyan, Vahram and Wang, Zhangyang and Navasardyan, Shant and Shi, Humphrey}, title = {StreamingT2V: Consistent, Dynamic, and Extendable Long Video Generation from Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2568-2577} }
AFL: A Single-Round Analytic Approach for Federated Learning with Pre-trained Models: Run He,

Kai Tong,

Di Fang,

Han Sun,

Ziqian Zeng,

Haoran Li,

Tianyi Chen,

Huiping Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Run and Tong, Kai and Fang, Di and Sun, Han and Zeng, Ziqian and Li, Haoran and Chen, Tianyi and Zhuang, Huiping}, title = {AFL: A Single-Round Analytic Approach for Federated Learning with Pre-trained Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4988-4998} }
BOLT: Boost Large Vision-Language Model Without Training for Long-form Video Understanding: Shuming Liu,

Chen Zhao,

Tianqi Xu,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shuming and Zhao, Chen and Xu, Tianqi and Ghanem, Bernard}, title = {BOLT: Boost Large Vision-Language Model Without Training for Long-form Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3318-3327} }
Reference-Based 3D-Aware Image Editing with Triplanes: Bahri Batuhan Bilecen,

Yigit Yalin,

Ning Yu,

Aysegul Dundar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bilecen_2025_CVPR, author = {Bilecen, Bahri Batuhan and Yalin, Yigit and Yu, Ning and Dundar, Aysegul}, title = {Reference-Based 3D-Aware Image Editing with Triplanes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5904-5915} }
One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception: Yuchen Xia,

Quan Yuan,

Guiyang Luo,

Xiaoyuan Fu,

Yang Li,

Xuanhan Zhu,

Tianyou Luo,

Siheng Chen,

Jinglin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Yuchen and Yuan, Quan and Luo, Guiyang and Fu, Xiaoyuan and Li, Yang and Zhu, Xuanhan and Luo, Tianyou and Chen, Siheng and Li, Jinglin}, title = {One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1592-1601} }
SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by Imitating Human Annotator Trajectories: Muzhi Zhu,

Yuzhuo Tian,

Hao Chen,

Chunluan Zhou,

Qingpei Guo,

Yang Liu,

Ming Yang,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Muzhi and Tian, Yuzhuo and Chen, Hao and Zhou, Chunluan and Guo, Qingpei and Liu, Yang and Yang, Ming and Shen, Chunhua}, title = {SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by Imitating Human Annotator Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3686-3696} }
SceneCrafter: Controllable Multi-View Driving Scene Editing: Zehao Zhu,

Yuliang Zou,

Chiyu Max Jiang,

Bo Sun,

Vincent Casser,

Xiukun Huang,

Jiahao Wang,

Zhenpei Yang,

Ruiqi Gao,

Leonidas Guibas,

Mingxing Tan,

Dragomir Anguelov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zehao and Zou, Yuliang and Jiang, Chiyu Max and Sun, Bo and Casser, Vincent and Huang, Xiukun and Wang, Jiahao and Yang, Zhenpei and Gao, Ruiqi and Guibas, Leonidas and Tan, Mingxing and Anguelov, Dragomir}, title = {SceneCrafter: Controllable Multi-View Driving Scene Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6812-6822} }
HeatFormer: A Neural Optimizer for Multiview Human Mesh Recovery: Yuto Matsubara,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsubara_2025_CVPR, author = {Matsubara, Yuto and Nishino, Ko}, title = {HeatFormer: A Neural Optimizer for Multiview Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6415-6424} }
GPS as a Control Signal for Image Generation: Chao Feng,

Ziyang Chen,

Aleksander Holynski,

Alexei A. Efros,

Andrew Owens; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Chao and Chen, Ziyang and Holynski, Aleksander and Efros, Alexei A. and Owens, Andrew}, title = {GPS as a Control Signal for Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2766-2778} }
CPath-Omni: A Unified Multimodal Foundation Model for Patch and Whole Slide Image Analysis in Computational Pathology: Yuxuan Sun,

Yixuan Si,

Chenglu Zhu,

Xuan Gong,

Kai Zhang,

Pingyi Chen,

Ye Zhang,

Zhongyi Shui,

Tao Lin,

Lin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yuxuan and Si, Yixuan and Zhu, Chenglu and Gong, Xuan and Zhang, Kai and Chen, Pingyi and Zhang, Ye and Shui, Zhongyi and Lin, Tao and Yang, Lin}, title = {CPath-Omni: A Unified Multimodal Foundation Model for Patch and Whole Slide Image Analysis in Computational Pathology}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10360-10371} }
MAGiC-SLAM: Multi-Agent Gaussian Globally Consistent SLAM: Vladimir Yugay,

Theo Gevers,

Martin R. Oswald; [pdf]
[bibtex]
@InProceedings{Yugay_2025_CVPR, author = {Yugay, Vladimir and Gevers, Theo and Oswald, Martin R.}, title = {MAGiC-SLAM: Multi-Agent Gaussian Globally Consistent SLAM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6741-6750} }
NTClick: Achieving Precise Interactive Segmentation With Noise-tolerant Clicks: Chenyi Zhang,

Ting Liu,

Xiaochao Qu,

Luoqi Liu,

Yao Zhao,

Yunchao Wei; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyi and Liu, Ting and Qu, Xiaochao and Liu, Luoqi and Zhao, Yao and Wei, Yunchao}, title = {NTClick: Achieving Precise Interactive Segmentation With Noise-tolerant Clicks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8921-8930} }
MVGenMaster: Scaling Multi-View Generation from Any Image via 3D Priors Enhanced Diffusion Model: Chenjie Cao,

Chaohui Yu,

Shang Liu,

Fan Wang,

Xiangyang Xue,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Chenjie and Yu, Chaohui and Liu, Shang and Wang, Fan and Xue, Xiangyang and Fu, Yanwei}, title = {MVGenMaster: Scaling Multi-View Generation from Any Image via 3D Priors Enhanced Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6045-6056} }
HiMoR: Monocular Deformable Gaussian Reconstruction with Hierarchical Motion Representation: Yiming Liang,

Tianhan Xu,

Yuta Kikuchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yiming and Xu, Tianhan and Kikuchi, Yuta}, title = {HiMoR: Monocular Deformable Gaussian Reconstruction with Hierarchical Motion Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {886-895} }
Enhanced Visual-Semantic Interaction with Tailored Prompts for Pedestrian Attribute Recognition: Junyi Wu,

Yan Huang,

Min Gao,

Yuzhen Niu,

Yuzhong Chen,

Qiang Wu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Junyi and Huang, Yan and Gao, Min and Niu, Yuzhen and Chen, Yuzhong and Wu, Qiang}, title = {Enhanced Visual-Semantic Interaction with Tailored Prompts for Pedestrian Attribute Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9570-9579} }
HSI-GPT: A General-Purpose Large Scene-Motion-Language Model for Human Scene Interaction: Yuan Wang,

Yali Li,

Xiang Li,

Shengjin Wang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuan and Li, Yali and Li, Xiang and Wang, Shengjin}, title = {HSI-GPT: A General-Purpose Large Scene-Motion-Language Model for Human Scene Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7147-7157} }
Vid2Avatar-Pro: Authentic Avatar from Videos in the Wild via Universal Prior: Chen Guo,

Junxuan Li,

Yash Kant,

Yaser Sheikh,

Shunsuke Saito,

Chen Cao; [pdf]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Chen and Li, Junxuan and Kant, Yash and Sheikh, Yaser and Saito, Shunsuke and Cao, Chen}, title = {Vid2Avatar-Pro: Authentic Avatar from Videos in the Wild via Universal Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5559-5570} }
RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing: Zhipeng Huang,

Wangbo Yu,

Xinhua Cheng,

Chengshu Zhao,

Yunyang Ge,

Mingyi Guo,

Li Yuan,

Yonghong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhipeng and Yu, Wangbo and Cheng, Xinhua and Zhao, Chengshu and Ge, Yunyang and Guo, Mingyi and Yuan, Li and Tian, Yonghong}, title = {RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {574-584} }
IRIS: Inverse Rendering of Indoor Scenes from Low Dynamic Range Images: Chih-Hao Lin,

Jia-Bin Huang,

Zhengqin Li,

Zhao Dong,

Christian Richardt,

Tuotuo Li,

Michael Zollhöfer,

Johannes Kopf,

Shenlong Wang,

Changil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Chih-Hao and Huang, Jia-Bin and Li, Zhengqin and Dong, Zhao and Richardt, Christian and Li, Tuotuo and Zollh\"ofer, Michael and Kopf, Johannes and Wang, Shenlong and Kim, Changil}, title = {IRIS: Inverse Rendering of Indoor Scenes from Low Dynamic Range Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {465-474} }
RoGSplat: Learning Robust Generalizable Human Gaussian Splatting from Sparse Multi-View Images: Junjin Xiao,

Qing Zhang,

Yonewei Nie,

Lei Zhu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Junjin and Zhang, Qing and Nie, Yonewei and Zhu, Lei and Zheng, Wei-Shi}, title = {RoGSplat: Learning Robust Generalizable Human Gaussian Splatting from Sparse Multi-View Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5980-5990} }
EnliveningGS: Active Locomotion of 3DGS: Siyuan Shen,

Tianjia Shao,

Kun Zhou,

Chenfanfu Jiang,

Yin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Siyuan and Shao, Tianjia and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {EnliveningGS: Active Locomotion of 3DGS}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {896-905} }
Knowledge-Aligned Counterfactual-Enhancement Diffusion Perception for Unsupervised Cross-Domain Visual Emotion Recognition: Wen Yin,

Yong Wang,

Guiduo Duan,

Dongyang Zhang,

Xin Hu,

Yuan-Fang Li,

Tao He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Wen and Wang, Yong and Duan, Guiduo and Zhang, Dongyang and Hu, Xin and Li, Yuan-Fang and He, Tao}, title = {Knowledge-Aligned Counterfactual-Enhancement Diffusion Perception for Unsupervised Cross-Domain Visual Emotion Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3888-3898} }
LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation: Vladan Stojnić,

Yannis Kalantidis,

Jiří Matas,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Stojnic_2025_CVPR, author = {Stojni\'c, Vladan and Kalantidis, Yannis and Matas, Ji\v{r}{\'\i} and Tolias, Giorgos}, title = {LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9794-9803} }
PhysGen3D: Crafting a Miniature Interactive World from a Single Image: Boyuan Chen,

Hanxiao Jiang,

Shaowei Liu,

Saurabh Gupta,

Yunzhu Li,

Hao Zhao,

Shenlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Boyuan and Jiang, Hanxiao and Liu, Shaowei and Gupta, Saurabh and Li, Yunzhu and Zhao, Hao and Wang, Shenlong}, title = {PhysGen3D: Crafting a Miniature Interactive World from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6178-6189} }
Docopilot: Improving Multimodal Models for Document-Level Understanding: Yuchen Duan,

Zhe Chen,

Yusong Hu,

Weiyun Wang,

Shenglong Ye,

Botian Shi,

Lewei Lu,

Qibin Hou,

Tong Lu,

Hongsheng Li,

Jifeng Dai,

Wenhai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Yuchen and Chen, Zhe and Hu, Yusong and Wang, Weiyun and Ye, Shenglong and Shi, Botian and Lu, Lewei and Hou, Qibin and Lu, Tong and Li, Hongsheng and Dai, Jifeng and Wang, Wenhai}, title = {Docopilot: Improving Multimodal Models for Document-Level Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4026-4037} }
Self-supervised ControlNet with Spatio-Temporal Mamba for Real-world Video Super-resolution: Shijun Shi,

Jing Xu,

Lijing Lu,

Zhihang Li,

Kai Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Shijun and Xu, Jing and Lu, Lijing and Li, Zhihang and Hu, Kai}, title = {Self-supervised ControlNet with Spatio-Temporal Mamba for Real-world Video Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7385-7395} }
LATTE-MV: Learning to Anticipate Table Tennis Hits from Monocular Videos: Daniel Etaat,

Dvij Kalaria,

Nima Rahmanian,

S. Shankar Sastry; [pdf] [supp]
[bibtex]
@InProceedings{Etaat_2025_CVPR, author = {Etaat, Daniel and Kalaria, Dvij and Rahmanian, Nima and Sastry, S. Shankar}, title = {LATTE-MV: Learning to Anticipate Table Tennis Hits from Monocular Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7115-7124} }
Distilling Spatially-Heterogeneous Distortion Perception for Blind Image Quality Assessment: Xudong Li,

Wenjie Nie,

Yan Zhang,

Runze Hu,

Ke Li,

Xiawu Zheng,

Liujuan Cao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xudong and Nie, Wenjie and Zhang, Yan and Hu, Runze and Li, Ke and Zheng, Xiawu and Cao, Liujuan}, title = {Distilling Spatially-Heterogeneous Distortion Perception for Blind Image Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2344-2354} }
2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification: Jingwei Zhang,

Anh Tien Nguyen,

Xi Han,

Vincent Quoc-Huy Trinh,

Hong Qin,

Dimitris Samaras,

Mahdi S. Hosseini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jingwei and Nguyen, Anh Tien and Han, Xi and Trinh, Vincent Quoc-Huy and Qin, Hong and Samaras, Dimitris and Hosseini, Mahdi S.}, title = {2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3583-3592} }
Unboxed: Geometrically and Temporally Consistent Video Outpainting: Zhongrui Yu,

Martina Megaro-Boldini,

Robert W. Sumner,

Abdelaziz Djelouah; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Zhongrui and Megaro-Boldini, Martina and Sumner, Robert W. and Djelouah, Abdelaziz}, title = {Unboxed: Geometrically and Temporally Consistent Video Outpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7309-7319} }
K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences: Zhikai Li,

Xuewen Liu,

Dongrong Joe Fu,

Jianquan Li,

Qingyi Gu,

Kurt Keutzer,

Zhen Dong; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhikai and Liu, Xuewen and Fu, Dongrong Joe and Li, Jianquan and Gu, Qingyi and Keutzer, Kurt and Dong, Zhen}, title = {K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9131-9141} }
Dense-SfM: Structure from Motion with Dense Consistent Matching: JongMin Lee,

Sungjoo Yoo; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, JongMin and Yoo, Sungjoo}, title = {Dense-SfM: Structure from Motion with Dense Consistent Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6404-6414} }
Sketchy Bounding-box Supervision for 3D Instance Segmentation: Qian Deng,

Le Hui,

Jin Xie,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Qian and Hui, Le and Xie, Jin and Yang, Jian}, title = {Sketchy Bounding-box Supervision for 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8879-8888} }
StreetCrafter: Street View Synthesis with Controllable Video Diffusion Models: Yunzhi Yan,

Zhen Xu,

Haotong Lin,

Haian Jin,

Haoyu Guo,

Yida Wang,

Kun Zhan,

Xianpeng Lang,

Hujun Bao,

Xiaowei Zhou,

Sida Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Yunzhi and Xu, Zhen and Lin, Haotong and Jin, Haian and Guo, Haoyu and Wang, Yida and Zhan, Kun and Lang, Xianpeng and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {StreetCrafter: Street View Synthesis with Controllable Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {822-832} }
Learning Person-Specific Animatable Face Models from In-the-Wild Images via a Shared Base Model: Yuxiang Mao,

Zhenfeng Fan,

ZhiJie Zhang,

Zhiheng Zhang,

Shihong Xia; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Yuxiang and Fan, Zhenfeng and Zhang, ZhiJie and Zhang, Zhiheng and Xia, Shihong}, title = {Learning Person-Specific Animatable Face Models from In-the-Wild Images via a Shared Base Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5602-5613} }
TIMotion: Temporal and Interactive Framework for Efficient Human-Human Motion Generation: Yabiao Wang,

Shuo Wang,

Jiangning Zhang,

Ke Fan,

Jiafu Wu,

Zhucun Xue,

Yong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yabiao and Wang, Shuo and Zhang, Jiangning and Fan, Ke and Wu, Jiafu and Xue, Zhucun and Liu, Yong}, title = {TIMotion: Temporal and Interactive Framework for Efficient Human-Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7169-7178} }
Hybrid Reciprocal Transformer with Triplet Feature Alignment for Scene Graph Generation: Jiawei Fu,

Tiantian Zhang,

Kai Chen,

Qi Dou; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Jiawei and Zhang, Tiantian and Chen, Kai and Dou, Qi}, title = {Hybrid Reciprocal Transformer with Triplet Feature Alignment for Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8953-8963} }
Gradient Inversion Attacks on Parameter-Efficient Fine-Tuning: Hasin Us Sami,

Swapneel Sen,

Amit K. Roy-Chowdhury,

Srikanth V. Krishnamurthy,

Basak Guler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sami_2025_CVPR, author = {Sami, Hasin Us and Sen, Swapneel and Roy-Chowdhury, Amit K. and Krishnamurthy, Srikanth V. and Guler, Basak}, title = {Gradient Inversion Attacks on Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10224-10234} }
UPME: An Unsupervised Peer Review Framework for Multimodal Large Language Model Evaluation: Qihui Zhang,

Munan Ning,

Zheyuan Liu,

Yue Huang,

Shuo Yang,

Yanbo Wang,

Jiayi Ye,

Xiao Chen,

Yibing Song,

Li Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qihui and Ning, Munan and Liu, Zheyuan and Huang, Yue and Yang, Shuo and Wang, Yanbo and Ye, Jiayi and Chen, Xiao and Song, Yibing and Yuan, Li}, title = {UPME: An Unsupervised Peer Review Framework for Multimodal Large Language Model Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9165-9174} }
MBQ: Modality-Balanced Quantization for Large Vision-Language Models: Shiyao Li,

Yingchun Hu,

Xuefei Ning,

Xihui Liu,

Ke Hong,

Xiaotao Jia,

Xiuhong Li,

Yaqi Yan,

Pei Ran,

Guohao Dai,

Shengen Yan,

Huazhong Yang,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shiyao and Hu, Yingchun and Ning, Xuefei and Liu, Xihui and Hong, Ke and Jia, Xiaotao and Li, Xiuhong and Yan, Yaqi and Ran, Pei and Dai, Guohao and Yan, Shengen and Yang, Huazhong and Wang, Yu}, title = {MBQ: Modality-Balanced Quantization for Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4167-4177} }
VideoDPO: Omni-Preference Alignment for Video Diffusion Generation: Runtao Liu,

Haoyu Wu,

Ziqiang Zheng,

Chen Wei,

Yingqing He,

Renjie Pi,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Runtao and Wu, Haoyu and Zheng, Ziqiang and Wei, Chen and He, Yingqing and Pi, Renjie and Chen, Qifeng}, title = {VideoDPO: Omni-Preference Alignment for Video Diffusion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8009-8019} }
Associative Transformer: Yuwei Sun,

Hideya Ochiai,

Zhirong Wu,

Stephen Lin,

Ryota Kanai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yuwei and Ochiai, Hideya and Wu, Zhirong and Lin, Stephen and Kanai, Ryota}, title = {Associative Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4518-4527} }
ChatGarment: Garment Estimation, Generation and Editing via Large Language Models: Siyuan Bian,

Chenghao Xu,

Yuliang Xiu,

Artur Grigorev,

Zhen Liu,

Cewu Lu,

Michael J. Black,

Yao Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bian_2025_CVPR, author = {Bian, Siyuan and Xu, Chenghao and Xiu, Yuliang and Grigorev, Artur and Liu, Zhen and Lu, Cewu and Black, Michael J. and Feng, Yao}, title = {ChatGarment: Garment Estimation, Generation and Editing via Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2924-2934} }
RDD: Robust Feature Detector and Descriptor using Deformable Transformer: Gonglin Chen,

Tianwen Fu,

Haiwei Chen,

Wenbin Teng,

Hanyuan Xiao,

Yajie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Gonglin and Fu, Tianwen and Chen, Haiwei and Teng, Wenbin and Xiao, Hanyuan and Zhao, Yajie}, title = {RDD: Robust Feature Detector and Descriptor using Deformable Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6394-6403} }
Building Vision Models upon Heat Conduction: Zhaozhi Wang,

Yue Liu,

Yunjie Tian,

Yunfan Liu,

Yaowei Wang,

Qixiang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhaozhi and Liu, Yue and Tian, Yunjie and Liu, Yunfan and Wang, Yaowei and Ye, Qixiang}, title = {Building Vision Models upon Heat Conduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9707-9717} }
LT3SD: Latent Trees for 3D Scene Diffusion: Quan Meng,

Lei Li,

Matthias Nießner,

Angela Dai; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Quan and Li, Lei and Nie{\ss}ner, Matthias and Dai, Angela}, title = {LT3SD: Latent Trees for 3D Scene Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {650-660} }
CraftsMan3D: High-fidelity Mesh Generation with 3D Native Diffusion and Interactive Geometry Refiner: Weiyu Li,

Jiarui Liu,

Hongyu Yan,

Rui Chen,

Yixun Liang,

Xuelin Chen,

Ping Tan,

Xiaoxiao Long; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Weiyu and Liu, Jiarui and Yan, Hongyu and Chen, Rui and Liang, Yixun and Chen, Xuelin and Tan, Ping and Long, Xiaoxiao}, title = {CraftsMan3D: High-fidelity Mesh Generation with 3D Native Diffusion and Interactive Geometry Refiner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5307-5317} }
GIF: Generative Inspiration for Face Recognition at Scale: Saeed Ebrahimi,

Sahar Rahimi,

Ali Dabouei,

Srinjoy Das,

Jeremy M. Dawson,

Nasser M. Nasrabadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ebrahimi_2025_CVPR, author = {Ebrahimi, Saeed and Rahimi, Sahar and Dabouei, Ali and Das, Srinjoy and Dawson, Jeremy M. and Nasrabadi, Nasser M.}, title = {GIF: Generative Inspiration for Face Recognition at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3528-3539} }
SKDream: Controllable Multi-view and 3D Generation with Arbitrary Skeletons: Yuanyou Xu,

Zongxin Yang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yuanyou and Yang, Zongxin and Yang, Yi}, title = {SKDream: Controllable Multi-view and 3D Generation with Arbitrary Skeletons}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {314-325} }
Optimus-2: Multimodal Minecraft Agent with Goal-Observation-Action Conditioned Policy: Zaijing Li,

Yuquan Xie,

Rui Shao,

Gongwei Chen,

Dongmei Jiang,

Liqiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zaijing and Xie, Yuquan and Shao, Rui and Chen, Gongwei and Jiang, Dongmei and Nie, Liqiang}, title = {Optimus-2: Multimodal Minecraft Agent with Goal-Observation-Action Conditioned Policy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9039-9049} }
Classic Video Denoising in a Machine Learning World: Robust, Fast, and Controllable: Xin Jin,

Simon Niklaus,

Zhoutong Zhang,

Zhihao Xia,

Chunle Guo,

Yuting Yang,

Jiawen Chen,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Xin and Niklaus, Simon and Zhang, Zhoutong and Xia, Zhihao and Guo, Chunle and Yang, Yuting and Chen, Jiawen and Li, Chongyi}, title = {Classic Video Denoising in a Machine Learning World: Robust, Fast, and Controllable}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2084-2093} }
Population Normalization for Federated Learning: Zhuoyao Wang,

Fan Yi,

Peizhu Gong,

Caitou He,

Cheng Jin,

Weizhong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhuoyao and Yi, Fan and Gong, Peizhu and He, Caitou and Jin, Cheng and Zhang, Weizhong}, title = {Population Normalization for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10214-10223} }
RipVIS: Rip Currents Video Instance Segmentation Benchmark for Beach Monitoring and Safety: Andrei Dumitriu,

Florin Tatui,

Florin Miron,

Aakash Ralhan,

Radu Tudor Ionescu,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dumitriu_2025_CVPR, author = {Dumitriu, Andrei and Tatui, Florin and Miron, Florin and Ralhan, Aakash and Ionescu, Radu Tudor and Timofte, Radu}, title = {RipVIS: Rip Currents Video Instance Segmentation Benchmark for Beach Monitoring and Safety}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3427-3437} }
ESCAPE: Equivariant Shape Completion via Anchor Point Encoding: Burak Bekci,

Nassir Navab,

Federico Tombari,

Mahdi Saleh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bekci_2025_CVPR, author = {Bekci, Burak and Navab, Nassir and Tombari, Federico and Saleh, Mahdi}, title = {ESCAPE: Equivariant Shape Completion via Anchor Point Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6480-6489} }
Satellite to GroundScape - Large-scale Consistent Ground View Generation from Satellite Views: Ningli Xu,

Rongjun Qin; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Ningli and Qin, Rongjun}, title = {Satellite to GroundScape - Large-scale Consistent Ground View Generation from Satellite Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6068-6077} }
Variance-Based Membership Inference Attacks Against Large-Scale Image Captioning Models: Daniel Samira,

Edan Habler,

Yuval Elovici,

Asaf Shabtai; [pdf] [supp]
[bibtex]
@InProceedings{Samira_2025_CVPR, author = {Samira, Daniel and Habler, Edan and Elovici, Yuval and Shabtai, Asaf}, title = {Variance-Based Membership Inference Attacks Against Large-Scale Image Captioning Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9210-9219} }
Learning Dynamic Collaborative Network for Semi-supervised 3D Vessel Segmentation: Jiao Xu,

Xin Chen,

Lihe Zhang; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jiao and Chen, Xin and Zhang, Lihe}, title = {Learning Dynamic Collaborative Network for Semi-supervised 3D Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10445-10454} }
Temporal Alignment-Free Video Matching for Few-shot Action Recognition: SuBeen Lee,

WonJun Moon,

Hyun Seok Seong,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, SuBeen and Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Temporal Alignment-Free Video Matching for Few-shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5412-5421} }
OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP: Mohamad Hassan N C,

Divyam Gupta,

Mainak Singha,

Sai Bhargav Rongali,

Ankit Jha,

Muhammad Haris Khan,

Biplab Banerjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{C_2025_CVPR, author = {C, Mohamad Hassan N and Gupta, Divyam and Singha, Mainak and Rongali, Sai Bhargav and Jha, Ankit and Khan, Muhammad Haris and Banerjee, Biplab}, title = {OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10110-10120} }
VLog: Video-Language Models by Generative Retrieval of Narration Vocabulary: Kevin Qinghong Lin,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {VLog: Video-Language Models by Generative Retrieval of Narration Vocabulary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3218-3228} }
Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite for Large Vision Language Models: Jin Wang,

Chenghui Lv,

Xian Li,

Shichao Dong,

Huadong Li,

Kelu Yao,

Chao Li,

Wenqi Shao,

Ping Luo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jin and Lv, Chenghui and Li, Xian and Dong, Shichao and Li, Huadong and Yao, Kelu and Li, Chao and Shao, Wenqi and Luo, Ping}, title = {Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4233-4245} }
Forensic Self-Descriptions Are All You Need for Zero-Shot Detection, Open-Set Source Attribution, and Clustering of AI-generated Images: Tai D. Nguyen,

Aref Azizpour,

Matthew C. Stamm; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Tai D. and Azizpour, Aref and Stamm, Matthew C.}, title = {Forensic Self-Descriptions Are All You Need for Zero-Shot Detection, Open-Set Source Attribution, and Clustering of AI-generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3040-3050} }
FlexDrive: Toward Trajectory Flexibility in Driving Scene Gaussian Splatting Reconstruction and Rendering: Jingqiu Zhou,

Lue Fan,

Linjiang Huang,

Xiaoyu Shi,

Si Liu,

Zhaoxiang Zhang,

Hongsheng Li; [pdf]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jingqiu and Fan, Lue and Huang, Linjiang and Shi, Xiaoyu and Liu, Si and Zhang, Zhaoxiang and Li, Hongsheng}, title = {FlexDrive: Toward Trajectory Flexibility in Driving Scene Gaussian Splatting Reconstruction and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1549-1558} }
Taming Video Diffusion Prior with Scene-Grounding Guidance for 3D Gaussian Splatting from Sparse Inputs: Yingji Zhong,

Zhihao Li,

Dave Zhenyu Chen,

Lanqing Hong,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Yingji and Li, Zhihao and Chen, Dave Zhenyu and Hong, Lanqing and Xu, Dan}, title = {Taming Video Diffusion Prior with Scene-Grounding Guidance for 3D Gaussian Splatting from Sparse Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6133-6143} }
Augmenting Multimodal LLMs with Self-Reflective Tokens for Knowledge-based Visual Question Answering: Federico Cocchi,

Nicholas Moratelli,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cocchi_2025_CVPR, author = {Cocchi, Federico and Moratelli, Nicholas and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Augmenting Multimodal LLMs with Self-Reflective Tokens for Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9199-9209} }
DifIISR: A Diffusion Model with Gradient Guidance for Infrared Image Super-Resolution: Xingyuan Li,

Zirui Wang,

Yang Zou,

Zhixin Chen,

Jun Ma,

Zhiying Jiang,

Long Ma,

Jinyuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xingyuan and Wang, Zirui and Zou, Yang and Chen, Zhixin and Ma, Jun and Jiang, Zhiying and Ma, Long and Liu, Jinyuan}, title = {DifIISR: A Diffusion Model with Gradient Guidance for Infrared Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7534-7544} }
3D-GSW: 3D Gaussian Splatting for Robust Watermarking: Youngdong Jang,

Hyunje Park,

Feng Yang,

Heeju Ko,

Euijin Choo,

Sangpil Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Youngdong and Park, Hyunje and Yang, Feng and Ko, Heeju and Choo, Euijin and Kim, Sangpil}, title = {3D-GSW: 3D Gaussian Splatting for Robust Watermarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5938-5948} }
OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation: Pengfei Zhou,

Xiaopeng Peng,

Jiajun Song,

Chuanhao Li,

Zhaopan Xu,

Yue Yang,

Ziyao Guo,

Hao Zhang,

Yuqi Lin,

Yefei He,

Lirui Zhao,

Shuo Liu,

Tianhua Li,

Yuxuan Xie,

Xiaojun Chang,

Yu Qiao,

Wenqi Shao,

Kaipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Pengfei and Peng, Xiaopeng and Song, Jiajun and Li, Chuanhao and Xu, Zhaopan and Yang, Yue and Guo, Ziyao and Zhang, Hao and Lin, Yuqi and He, Yefei and Zhao, Lirui and Liu, Shuo and Li, Tianhua and Xie, Yuxuan and Chang, Xiaojun and Qiao, Yu and Shao, Wenqi and Zhang, Kaipeng}, title = {OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {56-66} }
Dual Exposure Stereo for Extended Dynamic Range 3D Imaging: Juhyung Choi,

Jinnyeong Kim,

Seokjun Choi,

Jinwoo Lee,

Samuel Brucker,

Mario Bijelic,

Felix Heide,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Juhyung and Kim, Jinnyeong and Choi, Seokjun and Lee, Jinwoo and Brucker, Samuel and Bijelic, Mario and Heide, Felix and Baek, Seung-Hwan}, title = {Dual Exposure Stereo for Extended Dynamic Range 3D Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6283-6293} }
PAVE: Patching and Adapting Video Large Language Models: Zhuoming Liu,

Yiquan Li,

Khoi Duc Nguyen,

Yiwu Zhong,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhuoming and Li, Yiquan and Nguyen, Khoi Duc and Zhong, Yiwu and Li, Yin}, title = {PAVE: Patching and Adapting Video Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3306-3317} }
Generative Image Layer Decomposition with Visual Effects: Jinrui Yang,

Qing Liu,

Yijun Li,

Soo Ye Kim,

Daniil Pakhomov,

Mengwei Ren,

Jianming Zhang,

Zhe Lin,

Cihang Xie,

Yuyin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jinrui and Liu, Qing and Li, Yijun and Kim, Soo Ye and Pakhomov, Daniil and Ren, Mengwei and Zhang, Jianming and Lin, Zhe and Xie, Cihang and Zhou, Yuyin}, title = {Generative Image Layer Decomposition with Visual Effects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7643-7653} }
AR-Diffusion: Asynchronous Video Generation with Auto-Regressive Diffusion: Mingzhen Sun,

Weining Wang,

Gen Li,

Jiawei Liu,

Jiahui Sun,

Wanquan Feng,

Shanshan Lao,

Siyu Zhou,

Qian He,

Jing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Mingzhen and Wang, Weining and Li, Gen and Liu, Jiawei and Sun, Jiahui and Feng, Wanquan and Lao, Shanshan and Zhou, Siyu and He, Qian and Liu, Jing}, title = {AR-Diffusion: Asynchronous Video Generation with Auto-Regressive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7364-7373} }
Revisiting Audio-Visual Segmentation with Vision-Centric Transformer: Shaofei Huang,

Rui Ling,

Tianrui Hui,

Hongyu Li,

Xu Zhou,

Shifeng Zhang,

Si Liu,

Richang Hong,

Meng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Shaofei and Ling, Rui and Hui, Tianrui and Li, Hongyu and Zhou, Xu and Zhang, Shifeng and Liu, Si and Hong, Richang and Wang, Meng}, title = {Revisiting Audio-Visual Segmentation with Vision-Centric Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8352-8361} }
HOIGPT: Learning Long-Sequence Hand-Object Interaction with Language Models: Mingzhen Huang,

Fu-Jen Chu,

Bugra Tekin,

Kevin J. Liang,

Haoyu Ma,

Weiyao Wang,

Xingyu Chen,

Pierre Gleize,

Hongfei Xue,

Siwei Lyu,

Kris Kitani,

Matt Feiszli,

Hao Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Mingzhen and Chu, Fu-Jen and Tekin, Bugra and Liang, Kevin J. and Ma, Haoyu and Wang, Weiyao and Chen, Xingyu and Gleize, Pierre and Xue, Hongfei and Lyu, Siwei and Kitani, Kris and Feiszli, Matt and Tang, Hao}, title = {HOIGPT: Learning Long-Sequence Hand-Object Interaction with Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7136-7146} }
Taxonomy-Aware Evaluation of Vision-Language Models: Vésteinn Snæbjarnarson,

Kevin Du,

Niklas Stoehr,

Serge Belongie,

Ryan Cotterell,

Nico Lang,

Stella Frank; [pdf] [supp]
[bibtex]
@InProceedings{Snaebjarnarson_2025_CVPR, author = {Sn{\ae}bjarnarson, V\'esteinn and Du, Kevin and Stoehr, Niklas and Belongie, Serge and Cotterell, Ryan and Lang, Nico and Frank, Stella}, title = {Taxonomy-Aware Evaluation of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9109-9120} }
Active Event-based Stereo Vision: Jianing Li,

Yunjian Zhang,

Haiqian Han,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jianing and Zhang, Yunjian and Han, Haiqian and Ji, Xiangyang}, title = {Active Event-based Stereo Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {971-981} }
SMILE: Infusing Spatial and Motion Semantics in Masked Video Learning: Fida Mohammad Thoker,

Letian Jiang,

Chen Zhao,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thoker_2025_CVPR, author = {Thoker, Fida Mohammad and Jiang, Letian and Zhao, Chen and Ghanem, Bernard}, title = {SMILE: Infusing Spatial and Motion Semantics in Masked Video Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8438-8449} }
Video Language Model Pretraining with Spatio-temporal Masking: Yue Wu,

Zhaobo Qi,

Junshu Sun,

Yaowei Wang,

Qingming Huang,

Shuhui Wang; [pdf]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yue and Qi, Zhaobo and Sun, Junshu and Wang, Yaowei and Huang, Qingming and Wang, Shuhui}, title = {Video Language Model Pretraining with Spatio-temporal Masking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8557-8567} }
Synthetic Data is an Elegant GIFT for Continual Vision-Language Models: Bin Wu,

Wuxuan Shi,

Jinqiao Wang,

Mang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Bin and Shi, Wuxuan and Wang, Jinqiao and Ye, Mang}, title = {Synthetic Data is an Elegant GIFT for Continual Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2813-2823} }
Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model: Feng Liu,

Shiwei Zhang,

Xiaofeng Wang,

Yujie Wei,

Haonan Qiu,

Yuzhong Zhao,

Yingya Zhang,

Qixiang Ye,

Fang Wan; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Feng and Zhang, Shiwei and Wang, Xiaofeng and Wei, Yujie and Qiu, Haonan and Zhao, Yuzhong and Zhang, Yingya and Ye, Qixiang and Wan, Fang}, title = {Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7353-7363} }
Hypergraph Vision Transformers: Images are More than Nodes, More than Edges: Joshua Fixelle; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fixelle_2025_CVPR, author = {Fixelle, Joshua}, title = {Hypergraph Vision Transformers: Images are More than Nodes, More than Edges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9751-9761} }
Binarized Neural Network for Multi-spectral Image Fusion: Junming Hou,

Xiaoyu Chen,

Ran Ran,

Xiaofeng Cong,

Xinyang Liu,

Jian Wei You,

Liang-Jian Deng; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2025_CVPR, author = {Hou, Junming and Chen, Xiaoyu and Ran, Ran and Cong, Xiaofeng and Liu, Xinyang and You, Jian Wei and Deng, Liang-Jian}, title = {Binarized Neural Network for Multi-spectral Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2236-2245} }
GaussianIP: Identity-Preserving Realistic 3D Human Generation via Human-Centric Diffusion Prior: Zichen Tang,

Yuan Yao,

Miaomiao Cui,

Liefeng Bo,

Hongyu Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Zichen and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng and Yang, Hongyu}, title = {GaussianIP: Identity-Preserving Realistic 3D Human Generation via Human-Centric Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {348-358} }
FineVQ: Fine-Grained User Generated Content Video Quality Assessment: Huiyu Duan,

Qiang Hu,

Jiarui Wang,

Liu Yang,

Zitong Xu,

Lu Liu,

Xiongkuo Min,

Chunlei Cai,

Tianxiao Ye,

Xiaoyun Zhang,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Huiyu and Hu, Qiang and Wang, Jiarui and Yang, Liu and Xu, Zitong and Liu, Lu and Min, Xiongkuo and Cai, Chunlei and Ye, Tianxiao and Zhang, Xiaoyun and Zhai, Guangtao}, title = {FineVQ: Fine-Grained User Generated Content Video Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3206-3217} }
Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering Incorrectly: Yexin Liu,

Zhengyang Liang,

Yueze Wang,

Xianfeng Wu,

Feilong Tang,

Muyang He,

Jian Li,

Zheng Liu,

Harry Yang,

Sernam Lim,

Bo Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yexin and Liang, Zhengyang and Wang, Yueze and Wu, Xianfeng and Tang, Feilong and He, Muyang and Li, Jian and Liu, Zheng and Yang, Harry and Lim, Sernam and Zhao, Bo}, title = {Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering Incorrectly}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9087-9097} }
MANTA: Diffusion Mamba for Efficient and Effective Stochastic Long-Term Dense Action Anticipation: Olga Zatsarynna,

Emad Bahrami,

Yazan Abu Farha,

Gianpiero Francesca,

Juergen Gall; [pdf] [supp]
[bibtex]
@InProceedings{Zatsarynna_2025_CVPR, author = {Zatsarynna, Olga and Bahrami, Emad and Abu Farha, Yazan and Francesca, Gianpiero and Gall, Juergen}, title = {MANTA: Diffusion Mamba for Efficient and Effective Stochastic Long-Term Dense Action Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3438-3448} }
METASCENES: Towards Automated Replica Creation for Real-world 3D Scans: Huangyue Yu,

Baoxiong Jia,

Yixin Chen,

Yandan Yang,

Puhao Li,

Rongpeng Su,

Jiaxin Li,

Qing Li,

Wei Liang,

Song-Chun Zhu,

Tengyu Liu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Huangyue and Jia, Baoxiong and Chen, Yixin and Yang, Yandan and Li, Puhao and Su, Rongpeng and Li, Jiaxin and Li, Qing and Liang, Wei and Zhu, Song-Chun and Liu, Tengyu and Huang, Siyuan}, title = {METASCENES: Towards Automated Replica Creation for Real-world 3D Scans}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1667-1679} }
Robust Multimodal Survival Prediction with Conditional Latent Differentiation Variational AutoEncoder: Junjie Zhou,

Jiao Tang,

Yingli Zuo,

Peng Wan,

Daoqiang Zhang,

Wei Shao; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Tang, Jiao and Zuo, Yingli and Wan, Peng and Zhang, Daoqiang and Shao, Wei}, title = {Robust Multimodal Survival Prediction with Conditional Latent Differentiation Variational AutoEncoder}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10384-10393} }
Zero-Shot Blind-spot Image Denoising via Implicit Neural Sampling: Yuhui Quan,

Tianxiang Zheng,

Zhiyuan Ma,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{Quan_2025_CVPR, author = {Quan, Yuhui and Zheng, Tianxiang and Ma, Zhiyuan and Ji, Hui}, title = {Zero-Shot Blind-spot Image Denoising via Implicit Neural Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7502-7512} }
Track4Gen: Teaching Video Diffusion Models to Track Points Improves Video Generation: Hyeonho Jeong,

Chun-Hao P. Huang,

Jong Chul Ye,

Niloy J. Mitra,

Duygu Ceylan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Hyeonho and Huang, Chun-Hao P. and Ye, Jong Chul and Mitra, Niloy J. and Ceylan, Duygu}, title = {Track4Gen: Teaching Video Diffusion Models to Track Points Improves Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7276-7287} }
InteractAnything: Zero-shot Human Object Interaction Synthesis via LLM Feedback and Object Affordance Parsing: Jinlu Zhang,

Yixin Chen,

Zan Wang,

Jie Yang,

Yizhou Wang,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinlu and Chen, Yixin and Wang, Zan and Yang, Jie and Wang, Yizhou and Huang, Siyuan}, title = {InteractAnything: Zero-shot Human Object Interaction Synthesis via LLM Feedback and Object Affordance Parsing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7015-7025} }
Wonderland: Navigating 3D Scenes from a Single Image: Hanwen Liang,

Junli Cao,

Vidit Goel,

Guocheng Qian,

Sergei Korolev,

Demetri Terzopoulos,

Konstantinos N. Plataniotis,

Sergey Tulyakov,

Jian Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Hanwen and Cao, Junli and Goel, Vidit and Qian, Guocheng and Korolev, Sergei and Terzopoulos, Demetri and Plataniotis, Konstantinos N. and Tulyakov, Sergey and Ren, Jian}, title = {Wonderland: Navigating 3D Scenes from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {798-810} }
Towards Satellite Image Road Graph Extraction: A Global-Scale Dataset and A Novel Method: Pan Yin,

Kaiyu Li,

Xiangyong Cao,

Jing Yao,

Lei Liu,

Xueru Bai,

Feng Zhou,

Deyu Meng; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Pan and Li, Kaiyu and Cao, Xiangyong and Yao, Jing and Liu, Lei and Bai, Xueru and Zhou, Feng and Meng, Deyu}, title = {Towards Satellite Image Road Graph Extraction: A Global-Scale Dataset and A Novel Method}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1527-1537} }
SuperLightNet: Lightweight Parameter Aggregation Network for Multimodal Brain Tumor Segmentation: Feng Yu,

Jiacheng Cao,

Li Liu,

Minghua Jiang; [pdf]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Feng and Cao, Jiacheng and Liu, Li and Jiang, Minghua}, title = {SuperLightNet: Lightweight Parameter Aggregation Network for Multimodal Brain Tumor Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5197-5206} }
Self-Supervised Spatial Correspondence Across Modalities: Ayush Shrivastava,

Andrew Owens; [pdf] [arXiv]
[bibtex]
@InProceedings{Shrivastava_2025_CVPR, author = {Shrivastava, Ayush and Owens, Andrew}, title = {Self-Supervised Spatial Correspondence Across Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6383-6393} }
MOS-Attack: A Scalable Multi-objective Adversarial Attack Framework: Ping Guo,

Cheng Gong,

Xi Lin,

Fei Liu,

Zhichao Lu,

Qingfu Zhang,

Zhenkun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Ping and Gong, Cheng and Lin, Xi and Liu, Fei and Lu, Zhichao and Zhang, Qingfu and Wang, Zhenkun}, title = {MOS-Attack: A Scalable Multi-objective Adversarial Attack Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5041-5051} }
Motion Modes: What Could Happen Next?: Karran Pandey,

Yannick Hold-Geoffroy,

Matheus Gadelha,

Niloy J. Mitra,

Karan Singh,

Paul Guerrero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pandey_2025_CVPR, author = {Pandey, Karran and Hold-Geoffroy, Yannick and Gadelha, Matheus and Mitra, Niloy J. and Singh, Karan and Guerrero, Paul}, title = {Motion Modes: What Could Happen Next?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2030-2039} }
Finer-CAM: Spotting the Difference Reveals Finer Details for Visual Explanation: Ziheng Zhang,

Jianyang Gu,

Arpita Chowdhury,

Zheda Mai,

David Carlyn,

Tanya Berger-Wolf,

Yu Su,

Wei-Lun Chao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ziheng and Gu, Jianyang and Chowdhury, Arpita and Mai, Zheda and Carlyn, David and Berger-Wolf, Tanya and Su, Yu and Chao, Wei-Lun}, title = {Finer-CAM: Spotting the Difference Reveals Finer Details for Visual Explanation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9611-9620} }
The Change You Want To Detect: Semantic Change Detection In Earth Observation With Hybrid Data Generationf: Yanis Benidir,

Nicolas Gonthier,

Clement Mallet; [pdf] [supp]
[bibtex]
@InProceedings{Benidir_2025_CVPR, author = {Benidir, Yanis and Gonthier, Nicolas and Mallet, Clement}, title = {The Change You Want To Detect: Semantic Change Detection In Earth Observation With Hybrid Data Generationf}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2204-2214} }
Weakly Supervised Semantic Segmentation via Progressive Confidence Region Expansion: Xiangfeng Xu,

Pinyi Zhang,

Wenxuan Huang,

Yunhang Shen,

Haosheng Chen,

Jingzhong Lin,

Wei Li,

Gaoqi He,

Jiao Xie,

Shaohui Lin; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Xiangfeng and Zhang, Pinyi and Huang, Wenxuan and Shen, Yunhang and Chen, Haosheng and Lin, Jingzhong and Li, Wei and He, Gaoqi and Xie, Jiao and Lin, Shaohui}, title = {Weakly Supervised Semantic Segmentation via Progressive Confidence Region Expansion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9829-9838} }
RELOCATE: A Simple Training-Free Baseline for Visual Query Localization Using Region-Based Representations: Savya Khosla,

Sethuraman T V,

Alexander Schwing,

Derek Hoiem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khosla_2025_CVPR, author = {Khosla, Savya and V, Sethuraman T and Schwing, Alexander and Hoiem, Derek}, title = {RELOCATE: A Simple Training-Free Baseline for Visual Query Localization Using Region-Based Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3697-3706} }
HOTFormerLoc: Hierarchical Octree Transformer for Versatile Lidar Place Recognition Across Ground and Aerial Views: Ethan Griffiths,

Maryam Haghighat,

Simon Denman,

Clinton Fookes,

Milad Ramezani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Griffiths_2025_CVPR, author = {Griffiths, Ethan and Haghighat, Maryam and Denman, Simon and Fookes, Clinton and Ramezani, Milad}, title = {HOTFormerLoc: Hierarchical Octree Transformer for Versatile Lidar Place Recognition Across Ground and Aerial Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6648-6658} }
UniK3D: Universal Camera Monocular 3D Estimation: Luigi Piccinelli,

Christos Sakaridis,

Mattia Segu,

Yung-Hsu Yang,

Siyuan Li,

Wim Abbeloos,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Piccinelli_2025_CVPR, author = {Piccinelli, Luigi and Sakaridis, Christos and Segu, Mattia and Yang, Yung-Hsu and Li, Siyuan and Abbeloos, Wim and Van Gool, Luc}, title = {UniK3D: Universal Camera Monocular 3D Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1028-1039} }
ConMo: Controllable Motion Disentanglement and Recomposition for Zero-Shot Motion Transfer: Jiayi Gao,

Zijin Yin,

Changcheng Hua,

Yuxin Peng,

Kongming Liang,

Zhanyu Ma,

Jun Guo,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Jiayi and Yin, Zijin and Hua, Changcheng and Peng, Yuxin and Liang, Kongming and Ma, Zhanyu and Guo, Jun and Liu, Yang}, title = {ConMo: Controllable Motion Disentanglement and Recomposition for Zero-Shot Motion Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7191-7200} }
AG-VPReID: A Challenging Large-Scale Benchmark for Aerial-Ground Video-based Person Re-Identification: Huy Nguyen,

Kien Nguyen,

Akila Pemasiri,

Feng Liu,

Sridha Sridharan,

Clinton Fookes; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Huy and Nguyen, Kien and Pemasiri, Akila and Liu, Feng and Sridharan, Sridha and Fookes, Clinton}, title = {AG-VPReID: A Challenging Large-Scale Benchmark for Aerial-Ground Video-based Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1241-1251} }
EBS-EKF: Accurate and High Frequency Event-based Star Tracking: Albert W. Reed,

Connor Hashemi,

Dennis Melamed,

Nitesh Menon,

Keigo Hirakawa,

Scott McCloskey; [pdf] [supp]
[bibtex]
@InProceedings{Reed_2025_CVPR, author = {Reed, Albert W. and Hashemi, Connor and Melamed, Dennis and Menon, Nitesh and Hirakawa, Keigo and McCloskey, Scott}, title = {EBS-EKF: Accurate and High Frequency Event-based Star Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6510-6519} }
Benchmarking Object Detectors under Real-World Distribution Shifts in Satellite Imagery: Sara A. Al-Emadi,

Yin Yang,

Ferda Ofli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Al-Emadi_2025_CVPR, author = {Al-Emadi, Sara A. and Yang, Yin and Ofli, Ferda}, title = {Benchmarking Object Detectors under Real-World Distribution Shifts in Satellite Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8299-8309} }
SAIST: Segment Any Infrared Small Target Model Guided by Contrastive Language-Image Pretraining: Mingjin Zhang,

Xiaolong Li,

Fei Gao,

Jie Guo,

Xinbo Gao,

Jing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Mingjin and Li, Xiaolong and Gao, Fei and Guo, Jie and Gao, Xinbo and Zhang, Jing}, title = {SAIST: Segment Any Infrared Small Target Model Guided by Contrastive Language-Image Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9549-9558} }
Closed-Loop Supervised Fine-Tuning of Tokenized Traffic Models: Zhejun Zhang,

Peter Karkus,

Maximilian Igl,

Wenhao Ding,

Yuxiao Chen,

Boris Ivanovic,

Marco Pavone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhejun and Karkus, Peter and Igl, Maximilian and Ding, Wenhao and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {Closed-Loop Supervised Fine-Tuning of Tokenized Traffic Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5422-5432} }
CoLLM: A Large Language Model for Composed Image Retrieval: Chuong Huynh,

Jinyu Yang,

Ashish Tawari,

Mubarak Shah,

Son Tran,

Raffay Hamid,

Trishul Chilimbi,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huynh_2025_CVPR, author = {Huynh, Chuong and Yang, Jinyu and Tawari, Ashish and Shah, Mubarak and Tran, Son and Hamid, Raffay and Chilimbi, Trishul and Shrivastava, Abhinav}, title = {CoLLM: A Large Language Model for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3994-4004} }
GraphMimic: Graph-to-Graphs Generative Modeling from Videos for Policy Learning: Guangyan Chen,

Te Cui,

Meiling Wang,

Chengcai Yang,

Mengxiao Hu,

Haoyang Lu,

Yao Mu,

Zicai Peng,

Tianxing Zhou,

Xinran Jiang,

Yi Yang,

Yufeng Yue; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Guangyan and Cui, Te and Wang, Meiling and Yang, Chengcai and Hu, Mengxiao and Lu, Haoyang and Mu, Yao and Peng, Zicai and Zhou, Tianxing and Jiang, Xinran and Yang, Yi and Yue, Yufeng}, title = {GraphMimic: Graph-to-Graphs Generative Modeling from Videos for Policy Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1756-1768} }
MMVU: Measuring Expert-Level Multi-Discipline Video Understanding: Yilun Zhao,

Haowei Zhang,

Lujing Xie,

Tongyan Hu,

Guo Gan,

Yitao Long,

Zhiyuan Hu,

Weiyuan Chen,

Chuhan Li,

Zhijian Xu,

Chengye Wang,

Ziyao Shangguan,

Zhenwen Liang,

Yixin Liu,

Chen Zhao,

Arman Cohan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yilun and Zhang, Haowei and Xie, Lujing and Hu, Tongyan and Gan, Guo and Long, Yitao and Hu, Zhiyuan and Chen, Weiyuan and Li, Chuhan and Xu, Zhijian and Wang, Chengye and Shangguan, Ziyao and Liang, Zhenwen and Liu, Yixin and Zhao, Chen and Cohan, Arman}, title = {MMVU: Measuring Expert-Level Multi-Discipline Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8475-8489} }
EgoLM: Multi-Modal Language Model of Egocentric Motions: Fangzhou Hong,

Vladimir Guzov,

Hyo Jin Kim,

Yuting Ye,

Richard Newcombe,

Ziwei Liu,

Lingni Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Fangzhou and Guzov, Vladimir and Kim, Hyo Jin and Ye, Yuting and Newcombe, Richard and Liu, Ziwei and Ma, Lingni}, title = {EgoLM: Multi-Modal Language Model of Egocentric Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5344-5354} }
Long Video Diffusion Generation with Segmented Cross-Attention and Content-Rich Video Data Curation: Xin Yan,

Yuxuan Cai,

Qiuyue Wang,

Yuan Zhou,

Wenhao Huang,

Huan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Xin and Cai, Yuxuan and Wang, Qiuyue and Zhou, Yuan and Huang, Wenhao and Yang, Huan}, title = {Long Video Diffusion Generation with Segmented Cross-Attention and Content-Rich Video Data Curation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3184-3194} }
Learning Heterogeneous Tissues with Mixture of Experts for Gigapixel Whole Slide Images: Junxian Wu,

Minheng Chen,

Xinyi Ke,

Tianwang Xun,

Xiaoming Jiang,

Hongyu Zhou,

Lizhi Shao,

Youyong Kong; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Junxian and Chen, Minheng and Ke, Xinyi and Xun, Tianwang and Jiang, Xiaoming and Zhou, Hongyu and Shao, Lizhi and Kong, Youyong}, title = {Learning Heterogeneous Tissues with Mixture of Experts for Gigapixel Whole Slide Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5144-5153} }
Disentangled Pose and Appearance Guidance for Multi-Pose Generation: Tengfei Xiao,

Yue Wu,

Yuelong Li,

Can Qin,

Maoguo Gong,

Qiguang Miao,

Wenping Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Tengfei and Wu, Yue and Li, Yuelong and Qin, Can and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {Disentangled Pose and Appearance Guidance for Multi-Pose Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5646-5655} }
Mind the Gap: Confidence Discrepancy Can Guide Federated Semi-Supervised Learning Across Pseudo-Mismatch: Yijie Liu,

Xinyi Shang,

Yiqun Zhang,

Yang Lu,

Chen Gong,

Jing-Hao Xue,

Hanzi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yijie and Shang, Xinyi and Zhang, Yiqun and Lu, Yang and Gong, Chen and Xue, Jing-Hao and Wang, Hanzi}, title = {Mind the Gap: Confidence Discrepancy Can Guide Federated Semi-Supervised Learning Across Pseudo-Mismatch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10173-10182} }
Electromyography-Informed Facial Expression Reconstruction for Physiological-Based Synthesis and Analysis: Tim Büchner,

Christoph Anders,

Orlando Guntinas-Lichius,

Joachim Denzler; [pdf] [supp]
[bibtex]
@InProceedings{Buchner_2025_CVPR, author = {B\"uchner, Tim and Anders, Christoph and Guntinas-Lichius, Orlando and Denzler, Joachim}, title = {Electromyography-Informed Facial Expression Reconstruction for Physiological-Based Synthesis and Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {215-227} }
Improving the Transferability of Adversarial Attacks on Face Recognition with Diverse Parameters Augmentation: Fengfan Zhou,

Bangjie Yin,

Hefei Ling,

Qianyu Zhou,

Wenxuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Fengfan and Yin, Bangjie and Ling, Hefei and Zhou, Qianyu and Wang, Wenxuan}, title = {Improving the Transferability of Adversarial Attacks on Face Recognition with Diverse Parameters Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3516-3527} }
Adapting to Observation Length of Trajectory Prediction via Contrastive Learning: Ruiqi Qiu,

Jun Gong,

Xinyu Zhang,

Siqi Luo,

Bowen Zhang,

Yi Cen; [pdf]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Ruiqi and Gong, Jun and Zhang, Xinyu and Luo, Siqi and Zhang, Bowen and Cen, Yi}, title = {Adapting to Observation Length of Trajectory Prediction via Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1645-1654} }
Fine-Grained Image-Text Correspondence with Cost Aggregation for Open-Vocabulary Part Segmentation: Jiho Choi,

Seonho Lee,

Minhyun Lee,

Seungho Lee,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Jiho and Lee, Seonho and Lee, Minhyun and Lee, Seungho and Shim, Hyunjung}, title = {Fine-Grained Image-Text Correspondence with Cost Aggregation for Open-Vocabulary Part Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9782-9793} }
NitroFusion: High-Fidelity Single-Step Diffusion through Dynamic Adversarial Training: Dar-Yen Chen,

Hmrishav Bandyopadhyay,

Kai Zou,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Dar-Yen and Bandyopadhyay, Hmrishav and Zou, Kai and Song, Yi-Zhe}, title = {NitroFusion: High-Fidelity Single-Step Diffusion through Dynamic Adversarial Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7654-7663} }
CMMLoc: Advancing Text-to-PointCloud Localization with Cauchy-Mixture-Model Based Framework: Yanlong Xu,

Haoxuan Qu,

Jun Liu,

Wenxiao Zhang,

Xun Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yanlong and Qu, Haoxuan and Liu, Jun and Zhang, Wenxiao and Yang, Xun}, title = {CMMLoc: Advancing Text-to-PointCloud Localization with Cauchy-Mixture-Model Based Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6637-6647} }
RC-AutoCalib: An End-to-End Radar-Camera Automatic Calibration Network: Van-Tin Luu,

Yon-Lin Cai,

Vu-Hoang Tran,

Wei-Chen Chiu,

Yi-Ting Chen,

Ching-Chun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Luu_2025_CVPR, author = {Luu, Van-Tin and Cai, Yon-Lin and Tran, Vu-Hoang and Chiu, Wei-Chen and Chen, Yi-Ting and Huang, Ching-Chun}, title = {RC-AutoCalib: An End-to-End Radar-Camera Automatic Calibration Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6700-6709} }
Argus: A Compact and Versatile Foundation Model for Vision: Weiming Zhuang,

Chen Chen,

Zhizhong Li,

Sina Sajadmanesh,

Jingtao Li,

Jiabo Huang,

Vikash Sehwag,

Vivek Sharma,

Hirotaka Shinozaki,

Felan Carlo Garcia,

Yihao Zhan,

Naohiro Adachi,

Ryoji Eki,

Michael Spranger,

Peter Stone,

Lingjuan Lyu; [pdf] [supp]
[bibtex]
@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Weiming and Chen, Chen and Li, Zhizhong and Sajadmanesh, Sina and Li, Jingtao and Huang, Jiabo and Sehwag, Vikash and Sharma, Vivek and Shinozaki, Hirotaka and Garcia, Felan Carlo and Zhan, Yihao and Adachi, Naohiro and Eki, Ryoji and Spranger, Michael and Stone, Peter and Lyu, Lingjuan}, title = {Argus: A Compact and Versatile Foundation Model for Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4418-4429} }
Sampling Innovation-Based Adaptive Compressive Sensing: Zhifu Tian,

Tao Hu,

Chaoyang Niu,

Di Wu,

Shu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zhifu and Hu, Tao and Niu, Chaoyang and Wu, Di and Wang, Shu}, title = {Sampling Innovation-Based Adaptive Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2387-2397} }
MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models: Wenyi Hong,

Yean Cheng,

Zhuoyi Yang,

Weihan Wang,

Lefan Wang,

Xiaotao Gu,

Shiyu Huang,

Yuxiao Dong,

Jie Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Wenyi and Cheng, Yean and Yang, Zhuoyi and Wang, Weihan and Wang, Lefan and Gu, Xiaotao and Huang, Shiyu and Dong, Yuxiao and Tang, Jie}, title = {MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8450-8460} }
ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation: Yifan Pu,

Yiming Zhao,

Zhicong Tang,

Ruihong Yin,

Haoxing Ye,

Yuhui Yuan,

Dong Chen,

Jianmin Bao,

Sirui Zhang,

Yanbin Wang,

Lin Liang,

Lijuan Wang,

Ji Li,

Xiu Li,

Zhouhui Lian,

Gao Huang,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pu_2025_CVPR, author = {Pu, Yifan and Zhao, Yiming and Tang, Zhicong and Yin, Ruihong and Ye, Haoxing and Yuan, Yuhui and Chen, Dong and Bao, Jianmin and Zhang, Sirui and Wang, Yanbin and Liang, Lin and Wang, Lijuan and Li, Ji and Li, Xiu and Lian, Zhouhui and Huang, Gao and Guo, Baining}, title = {ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7952-7962} }
ArcPro: Architectural Programs for Structured 3D Abstraction of Sparse Points: Qirui Huang,

Runze Zhang,

Kangjun Liu,

Minglun Gong,

Hao Zhang,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Qirui and Zhang, Runze and Liu, Kangjun and Gong, Minglun and Zhang, Hao and Huang, Hui}, title = {ArcPro: Architectural Programs for Structured 3D Abstraction of Sparse Points}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6563-6572} }
Hardware-Rasterized Ray-Based Gaussian Splatting: Samuel Rota Bulò,

Nemanja Bartolovic,

Lorenzo Porzi,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Bulo_2025_CVPR, author = {Bul\`o, Samuel Rota and Bartolovic, Nemanja and Porzi, Lorenzo and Kontschieder, Peter}, title = {Hardware-Rasterized Ray-Based Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {485-494} }
FreqDebias: Towards Generalizable Deepfake Detection via Consistency-Driven Frequency Debiasing: Hossein Kashiani,

Niloufar Alipour Talemi,

Fatemeh Afghah; [pdf] [supp]
[bibtex]
@InProceedings{Kashiani_2025_CVPR, author = {Kashiani, Hossein and Talemi, Niloufar Alipour and Afghah, Fatemeh}, title = {FreqDebias: Towards Generalizable Deepfake Detection via Consistency-Driven Frequency Debiasing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8775-8785} }
Multi-subject Open-set Personalization in Video Generation: Tsai-Shien Chen,

Aliaksandr Siarohin,

Willi Menapace,

Yuwei Fang,

Kwot Sin Lee,

Ivan Skorokhodov,

Kfir Aberman,

Jun-Yan Zhu,

Ming-Hsuan Yang,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Menapace, Willi and Fang, Yuwei and Lee, Kwot Sin and Skorokhodov, Ivan and Aberman, Kfir and Zhu, Jun-Yan and Yang, Ming-Hsuan and Tulyakov, Sergey}, title = {Multi-subject Open-set Personalization in Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6099-6110} }
Wav2Sem: Plug-and-Play Audio Semantic Decoupling for 3D Speech-Driven Facial Animation: Hao Li,

Ju Dai,

Xin Zhao,

Feng Zhou,

Junjun Pan,

Lei Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hao and Dai, Ju and Zhao, Xin and Zhou, Feng and Pan, Junjun and Li, Lei}, title = {Wav2Sem: Plug-and-Play Audio Semantic Decoupling for 3D Speech-Driven Facial Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {183-192} }
FG^2: Fine-Grained Cross-View Localization by Fine-Grained Feature Matching: Zimin Xia,

Alexandre Alahi; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Zimin and Alahi, Alexandre}, title = {FG{\textasciicircum}2: Fine-Grained Cross-View Localization by Fine-Grained Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6362-6372} }
Distilled Prompt Learning for Incomplete Multimodal Survival Prediction: Yingxue Xu,

Fengtao Zhou,

Chenyu Zhao,

Yihui Wang,

Can Yang,

Hao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yingxue and Zhou, Fengtao and Zhao, Chenyu and Wang, Yihui and Yang, Can and Chen, Hao}, title = {Distilled Prompt Learning for Incomplete Multimodal Survival Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5102-5111} }
Learning Conditional Space-Time Prompt Distributions for Video Class-Incremental Learning: Xiaohan Zou,

Wenchao Ma,

Shu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2025_CVPR, author = {Zou, Xiaohan and Ma, Wenchao and Zhao, Shu}, title = {Learning Conditional Space-Time Prompt Distributions for Video Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4862-4873} }
Hyperbolic Safety-Aware Vision-Language Models: Tobia Poppi,

Tejaswi Kasarla,

Pascal Mettes,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Poppi_2025_CVPR, author = {Poppi, Tobia and Kasarla, Tejaswi and Mettes, Pascal and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Hyperbolic Safety-Aware Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4222-4232} }
SinGS: Animatable Single-Image Human Gaussian Splats with Kinematic Priors: Yufan Wu,

Xuanhong Chen,

Wen Li,

Shunran Jia,

Hualiang Wei,

Kairui Feng,

Jialiang Chen,

Yuhan Li,

Ang He,

Weimin Zhang,

Bingbing Ni,

Wenjun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yufan and Chen, Xuanhong and Li, Wen and Jia, Shunran and Wei, Hualiang and Feng, Kairui and Chen, Jialiang and Li, Yuhan and He, Ang and Zhang, Weimin and Ni, Bingbing and Zhang, Wenjun}, title = {SinGS: Animatable Single-Image Human Gaussian Splats with Kinematic Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5571-5580} }
Multi-Modal Synergistic Implicit Image Enhancement for Efficient Optical Flow Estimation: Weichen Dai,

Hexing Wu,

Xiaoyang Weng,

Yuxin Zheng,

Yuhang Ming,

Wanzeng Kong; [pdf]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Weichen and Wu, Hexing and Weng, Xiaoyang and Zheng, Yuxin and Ming, Yuhang and Kong, Wanzeng}, title = {Multi-Modal Synergistic Implicit Image Enhancement for Efficient Optical Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2173-2182} }
Generating Multimodal Driving Scenes via Next-Scene Prediction: Yanhao Wu,

Haoyang Zhang,

Tianwei Lin,

Lichao Huang,

Shujie Luo,

Rui Wu,

Congpei Qiu,

Wei Ke,

Tong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yanhao and Zhang, Haoyang and Lin, Tianwei and Huang, Lichao and Luo, Shujie and Wu, Rui and Qiu, Congpei and Ke, Wei and Zhang, Tong}, title = {Generating Multimodal Driving Scenes via Next-Scene Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6844-6853} }
Symmetry Strikes Back: From Single-Image Symmetry Detection to 3D Generation: Xiang Li,

Zixuan Huang,

Anh Thai,

James M. Rehg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiang and Huang, Zixuan and Thai, Anh and Rehg, James M.}, title = {Symmetry Strikes Back: From Single-Image Symmetry Detection to 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {743-752} }
PosterMaker: Towards High-Quality Product Poster Generation with Accurate Text Rendering: Yifan Gao,

Zihang Lin,

Chuanbin Liu,

Min Zhou,

Tiezheng Ge,

Bo Zheng,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Yifan and Lin, Zihang and Liu, Chuanbin and Zhou, Min and Ge, Tiezheng and Zheng, Bo and Xie, Hongtao}, title = {PosterMaker: Towards High-Quality Product Poster Generation with Accurate Text Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8083-8093} }
Rethinking Personalized Aesthetics Assessment: Employing Physique Aesthetics Assessment as An Exemplification: Haobin Zhong,

Shuai He,

Anlong Ming,

Huadong Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Haobin and He, Shuai and Ming, Anlong and Ma, Huadong}, title = {Rethinking Personalized Aesthetics Assessment: Employing Physique Aesthetics Assessment as An Exemplification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2935-2944} }
You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale: Baorui Ma,

Huachen Gao,

Haoge Deng,

Zhengxiong Luo,

Tiejun Huang,

Lulu Tang,

Xinlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Baorui and Gao, Huachen and Deng, Haoge and Luo, Zhengxiong and Huang, Tiejun and Tang, Lulu and Wang, Xinlong}, title = {You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2016-2029} }
PEACE: Empowering Geologic Map Holistic Understanding with MLLMs: Yangyu Huang,

Tianyi Gao,

Haoran Xu,

Qihao Zhao,

Yang Song,

Zhipeng Gui,

Tengchao Lv,

Hao Chen,

Lei Cui,

Scarlett Li,

Furu Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yangyu and Gao, Tianyi and Xu, Haoran and Zhao, Qihao and Song, Yang and Gui, Zhipeng and Lv, Tengchao and Chen, Hao and Cui, Lei and Li, Scarlett and Wei, Furu}, title = {PEACE: Empowering Geologic Map Holistic Understanding with MLLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3899-3908} }
ConceptGuard: Continual Personalized Text-to-Image Generation with Forgetting and Confusion Mitigation: Zirun Guo,

Tao Jin; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zirun and Jin, Tao}, title = {ConceptGuard: Continual Personalized Text-to-Image Generation with Forgetting and Confusion Mitigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2945-2954} }
MARVEL-40M+: Multi-Level Visual Elaboration for High-Fidelity Text-to-3D Content Creation: Sankalp Sinha,

Mohammad Sadil Khan,

Muhammad Usama,

Shino Sam,

Didier Stricker,

Sk Aziz Ali,

Muhammad Zeshan Afzal; [pdf] [supp]
[bibtex]
@InProceedings{Sinha_2025_CVPR, author = {Sinha, Sankalp and Khan, Mohammad Sadil and Usama, Muhammad and Sam, Shino and Stricker, Didier and Ali, Sk Aziz and Afzal, Muhammad Zeshan}, title = {MARVEL-40M+: Multi-Level Visual Elaboration for High-Fidelity Text-to-3D Content Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8105-8116} }
ERUPT: Efficient Rendering with Unposed Patch Transformer: Maxim V. Shugaev,

Vincent Chen,

Maxim Karrenbach,

Kyle Ashley,

Bridget Kennedy,

Naresh P. Cuntoor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shugaev_2025_CVPR, author = {Shugaev, Maxim V. and Chen, Vincent and Karrenbach, Maxim and Ashley, Kyle and Kennedy, Bridget and Cuntoor, Naresh P.}, title = {ERUPT: Efficient Rendering with Unposed Patch Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6057-6067} }
Rethinking End-to-End 2D to 3D Scene Segmentation in Gaussian Splatting: Runsong Zhu,

Shi Qiu,

Zhengzhe Liu,

Ka-Hei Hui,

Qianyi Wu,

Pheng-Ann Heng,

Chi-Wing Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Runsong and Qiu, Shi and Liu, Zhengzhe and Hui, Ka-Hei and Wu, Qianyi and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {Rethinking End-to-End 2D to 3D Scene Segmentation in Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3656-3665} }
Quad-Pixel Image Defocus Deblurring: A New Benchmark and Model: Hang Chen,

Yin Xie,

Xiaoxiu Peng,

Lihu Sun,

Wenkai Su,

Xiaodong Yang,

Chengming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Hang and Xie, Yin and Peng, Xiaoxiu and Sun, Lihu and Su, Wenkai and Yang, Xiaodong and Liu, Chengming}, title = {Quad-Pixel Image Defocus Deblurring: A New Benchmark and Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5709-5719} }
GEAL: Generalizable 3D Affordance Learning with Cross-Modal Consistency: Dongyue Lu,

Lingdong Kong,

Tianxin Huang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Dongyue and Kong, Lingdong and Huang, Tianxin and Lee, Gim Hee}, title = {GEAL: Generalizable 3D Affordance Learning with Cross-Modal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1680-1690} }
Dynamic Derivation and Elimination: Audio Visual Segmentation with Enhanced Audio Semantics: Chen Liu,

Liying Yang,

Peike Li,

Dadong Wang,

Lincheng Li,

Xin Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Chen and Yang, Liying and Li, Peike and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Dynamic Derivation and Elimination: Audio Visual Segmentation with Enhanced Audio Semantics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3131-3141} }
ESC: Erasing Space Concept for Knowledge Deletion: Tae-Young Lee,

Sundong Park,

Minwoo Jeon,

Hyoseok Hwang,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Tae-Young and Park, Sundong and Jeon, Minwoo and Hwang, Hyoseok and Park, Gyeong-Moon}, title = {ESC: Erasing Space Concept for Knowledge Deletion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5010-5019} }
Temporal Separation with Entropy Regularization for Knowledge Distillation in Spiking Neural Networks: Kairong Yu,

Chengting Yu,

Tianqing Zhang,

Xiaochen Zhao,

Shu Yang,

Hongwei Wang,

Qiang Zhang,

Qi Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Kairong and Yu, Chengting and Zhang, Tianqing and Zhao, Xiaochen and Yang, Shu and Wang, Hongwei and Zhang, Qiang and Xu, Qi}, title = {Temporal Separation with Entropy Regularization for Knowledge Distillation in Spiking Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8806-8816} }
Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems: Song Xia,

Yi Yu,

Wenhan Yang,

Meiwen Ding,

Zhuo Chen,

Ling-Yu Duan,

Alex C. Kot,

Xudong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Song and Yu, Yi and Yang, Wenhan and Ding, Meiwen and Chen, Zhuo and Duan, Ling-Yu and Kot, Alex C. and Jiang, Xudong}, title = {Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8753-8763} }
Flowing from Words to Pixels: A Noise-Free Framework for Cross-Modality Evolution: Qihao Liu,

Xi Yin,

Alan Yuille,

Andrew Brown,

Mannat Singh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Qihao and Yin, Xi and Yuille, Alan and Brown, Andrew and Singh, Mannat}, title = {Flowing from Words to Pixels: A Noise-Free Framework for Cross-Modality Evolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2755-2765} }
Interpretable Generative Models through Post-hoc Concept Bottlenecks: Akshay Kulkarni,

Ge Yan,

Chung-En Sun,

Tuomas Oikarinen,

Tsui-Wei Weng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2025_CVPR, author = {Kulkarni, Akshay and Yan, Ge and Sun, Chung-En and Oikarinen, Tuomas and Weng, Tsui-Wei}, title = {Interpretable Generative Models through Post-hoc Concept Bottlenecks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8162-8171} }
Watermarking One for All: A Robust Watermarking Scheme Against Partial Image Theft: Gaozhi Liu,

Silu Cao,

Zhenxing Qian,

Xinpeng Zhang,

Sheng Li,

Wanli Peng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Gaozhi and Cao, Silu and Qian, Zhenxing and Zhang, Xinpeng and Li, Sheng and Peng, Wanli}, title = {Watermarking One for All: A Robust Watermarking Scheme Against Partial Image Theft}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8225-8234} }
Minding Fuzzy Regions: A Data-driven Alternating Learning Paradigm for Stable Lesion Segmentation: Lexin Fang,

Yunyang Xu,

Xiang Ma,

Xuemei Li,

Caiming Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Lexin and Xu, Yunyang and Ma, Xiang and Li, Xuemei and Zhang, Caiming}, title = {Minding Fuzzy Regions: A Data-driven Alternating Learning Paradigm for Stable Lesion Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10425-10434} }
IM-Zero: Instance-level Motion Controllable Video Generation in a Zero-shot Manner: Yuyang Huang,

Yabo Chen,

Li Ding,

Xiaopeng Zhang,

Wenrui Dai,

Junni Zou,

Hongkai Xiong,

Qi Tian; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yuyang and Chen, Yabo and Ding, Li and Zhang, Xiaopeng and Dai, Wenrui and Zou, Junni and Xiong, Hongkai and Tian, Qi}, title = {IM-Zero: Instance-level Motion Controllable Video Generation in a Zero-shot Manner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7265-7275} }
Link-based Contrastive Learning for One-Shot Unsupervised Domain Adaptation: Yue Zhang,

Mingyue Bin,

Yuyang Zhang,

Zhongyuan Wang,

Zhen Han,

Chao Liang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yue and Bin, Mingyue and Zhang, Yuyang and Wang, Zhongyuan and Han, Zhen and Liang, Chao}, title = {Link-based Contrastive Learning for One-Shot Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4916-4926} }
UniMamba: Unified Spatial-Channel Representation Learning with Group-Efficient Mamba for LiDAR-based 3D Object Detection: Xin Jin,

Haisheng Su,

Kai Liu,

Cong Ma,

Wei Wu,

Fei HUI,

Junchi Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Xin and Su, Haisheng and Liu, Kai and Ma, Cong and Wu, Wei and HUI, Fei and Yan, Junchi}, title = {UniMamba: Unified Spatial-Channel Representation Learning with Group-Efficient Mamba for LiDAR-based 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1407-1417} }
Rethinking the Adversarial Robustness of Multi-Exit Neural Networks in an Attack-Defense Game: Keyizhi Xu,

Chi Zhang,

Zhan Chen,

Zhongyuan Wang,

Chunxia Xiao,

Chao Liang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Keyizhi and Zhang, Chi and Chen, Zhan and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {Rethinking the Adversarial Robustness of Multi-Exit Neural Networks in an Attack-Defense Game}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10265-10274} }
VideoGuide: Improving Video Diffusion Models without Training Through a Teacher's Guide: Dohun Lee,

Bryan Sangwoo Kim,

Geon Yeong Park,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Dohun and Kim, Bryan Sangwoo and Park, Geon Yeong and Ye, Jong Chul}, title = {VideoGuide: Improving Video Diffusion Models without Training Through a Teacher's Guide}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2599-2608} }
OmniStereo: Real-time Omnidireactional Depth Estimation with Multiview Fisheye Cameras: Jiaxi Deng,

Yushen Wang,

Haitao Meng,

Zuoxun Hou,

Yi Chang,

Gang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Jiaxi and Wang, Yushen and Meng, Haitao and Hou, Zuoxun and Chang, Yi and Chen, Gang}, title = {OmniStereo: Real-time Omnidireactional Depth Estimation with Multiview Fisheye Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1003-1012} }
DroneSplat: 3D Gaussian Splatting for Robust 3D Reconstruction from In-the-Wild Drone Imagery: Jiadong Tang,

Yu Gao,

Dianyi Yang,

Liqi Yan,

Yufeng Yue,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Jiadong and Gao, Yu and Yang, Dianyi and Yan, Liqi and Yue, Yufeng and Yang, Yi}, title = {DroneSplat: 3D Gaussian Splatting for Robust 3D Reconstruction from In-the-Wild Drone Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {833-843} }
SDGOCC: Semantic and Depth-Guided Bird's-Eye View Transformation for 3D Multimodal Occupancy Prediction: ZaiPeng Duan,

ChenXu Dang,

Xuzhong Hu,

Pei An,

Junfeng Ding,

Jie Zhan,

YunBiao Xu,

Jie Ma; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, ZaiPeng and Dang, ChenXu and Hu, Xuzhong and An, Pei and Ding, Junfeng and Zhan, Jie and Xu, YunBiao and Ma, Jie}, title = {SDGOCC: Semantic and Depth-Guided Bird's-Eye View Transformation for 3D Multimodal Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6751-6760} }
Efficient Video Face Enhancement with Enhanced Spatial-Temporal Consistency: Yutong Wang,

Jiajie Teng,

Jiajiong Cao,

Yuming Li,

Chenguang Ma,

Hongteng Xu,

Dixin Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yutong and Teng, Jiajie and Cao, Jiajiong and Li, Yuming and Ma, Chenguang and Xu, Hongteng and Luo, Dixin}, title = {Efficient Video Face Enhancement with Enhanced Spatial-Temporal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2183-2193} }
IDProtector: An Adversarial Noise Encoder to Protect Against ID-Preserving Image Generation: Yiren Song,

Pei Yang,

Hai Ci,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Yiren and Yang, Pei and Ci, Hai and Shou, Mike Zheng}, title = {IDProtector: An Adversarial Noise Encoder to Protect Against ID-Preserving Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3019-3028} }
LoTUS: Large-Scale Machine Unlearning with a Taste of Uncertainty: Christoforos N. Spartalis,

Theodoros Semertzidis,

Efstratios Gavves,

Petros Daras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Spartalis_2025_CVPR, author = {Spartalis, Christoforos N. and Semertzidis, Theodoros and Gavves, Efstratios and Daras, Petros}, title = {LoTUS: Large-Scale Machine Unlearning with a Taste of Uncertainty}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10046-10055} }
SleeperMark: Towards Robust Watermark against Fine-Tuning Text-to-image Diffusion Models: Zilan Wang,

Junfeng Guo,

Jiacheng Zhu,

Yiming Li,

Heng Huang,

Muhao Chen,

Zhengzhong Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zilan and Guo, Junfeng and Zhu, Jiacheng and Li, Yiming and Huang, Heng and Chen, Muhao and Tu, Zhengzhong}, title = {SleeperMark: Towards Robust Watermark against Fine-Tuning Text-to-image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8213-8224} }
Illumination Spectrum Estimation for Multispectral Images via Surface Reflectance Modeling and Spatial-Spectral Feature Generation: Hyejin Oh,

Woo-Shik Kim,

Sangyoon Lee,

YungKyung Park,

Je-Won Kang; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2025_CVPR, author = {Oh, Hyejin and Kim, Woo-Shik and Lee, Sangyoon and Park, YungKyung and Kang, Je-Won}, title = {Illumination Spectrum Estimation for Multispectral Images via Surface Reflectance Modeling and Spatial-Spectral Feature Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2215-2225} }
Towards Explainable and Unprecedented Accuracy in Matching Challenging Finger Crease Patterns: Zhenyu Zhou,

Chengdong Dong,

Ajay Kumar; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zhenyu and Dong, Chengdong and Kumar, Ajay}, title = {Towards Explainable and Unprecedented Accuracy in Matching Challenging Finger Crease Patterns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6212-6221} }
Neural Hierarchical Decomposition for Single Image Plant Modeling: Zhihao Liu,

Zhanglin Cheng,

Naoto Yokoya; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhihao and Cheng, Zhanglin and Yokoya, Naoto}, title = {Neural Hierarchical Decomposition for Single Image Plant Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {733-742} }
Dual-Agent Optimization framework for Cross-Domain Few-Shot Segmentation: Zhaoyang Li,

Yuan Wang,

Wangkai Li,

Tianzhu Zhang,

Xiang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhaoyang and Wang, Yuan and Li, Wangkai and Zhang, Tianzhu and Liu, Xiang}, title = {Dual-Agent Optimization framework for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9849-9859} }
SACB-Net: Spatial-awareness Convolutions for Medical Image Registration: Xinxing Cheng,

Tianyang Zhang,

Wenqi Lu,

Qingjie Meng,

Alejandro F. Frangi,

Jinming Duan; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Xinxing and Zhang, Tianyang and Lu, Wenqi and Meng, Qingjie and Frangi, Alejandro F. and Duan, Jinming}, title = {SACB-Net: Spatial-awareness Convolutions for Medical Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5227-5237} }
Text Embedding is Not All You Need: Attention Control for Text-to-Image Semantic Alignment with Text Self-Attention Maps: Jeeyung Kim,

Erfan Esmaeili,

Qiang Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jeeyung and Esmaeili, Erfan and Qiu, Qiang}, title = {Text Embedding is Not All You Need: Attention Control for Text-to-Image Semantic Alignment with Text Self-Attention Maps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8031-8040} }
DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion: Jinyuan Liu,

Bowei Zhang,

Qingyun Mei,

Xingyuan Li,

Yang Zou,

Zhiying Jiang,

Long Ma,

Risheng Liu,

Xin Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jinyuan and Zhang, Bowei and Mei, Qingyun and Li, Xingyuan and Zou, Yang and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2226-2235} }
AIpparel: A Multimodal Foundation Model for Digital Garments: Kiyohiro Nakayama,

Jan Ackermann,

Timur Levent Kesdogan,

Yang Zheng,

Maria Korosteleva,

Olga Sorkine-Hornung,

Leonidas J. Guibas,

Guandao Yang,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nakayama_2025_CVPR, author = {Nakayama, Kiyohiro and Ackermann, Jan and Kesdogan, Timur Levent and Zheng, Yang and Korosteleva, Maria and Sorkine-Hornung, Olga and Guibas, Leonidas J. and Yang, Guandao and Wetzstein, Gordon}, title = {AIpparel: A Multimodal Foundation Model for Digital Garments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8138-8149} }
PO3AD: Predicting Point Offsets toward Better 3D Point Cloud Anomaly Detection: Jianan Ye,

Weiguang Zhao,

Xi Yang,

Guangliang Cheng,

Kaizhu Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Jianan and Zhao, Weiguang and Yang, Xi and Cheng, Guangliang and Huang, Kaizhu}, title = {PO3AD: Predicting Point Offsets toward Better 3D Point Cloud Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1353-1362} }
ICT: Image-Object Cross-Level Trusted Intervention for Mitigating Object Hallucination in Large Vision-Language Models: Junzhe Chen,

Tianshu Zhang,

Shiyu Huang,

Yuwei Niu,

Linfeng Zhang,

Lijie Wen,

Xuming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Junzhe and Zhang, Tianshu and Huang, Shiyu and Niu, Yuwei and Zhang, Linfeng and Wen, Lijie and Hu, Xuming}, title = {ICT: Image-Object Cross-Level Trusted Intervention for Mitigating Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4209-4221} }
PreciseCam: Precise Camera Control for Text-to-Image Generation: Edurne Bernal-Berdun,

Ana Serrano,

Belen Masia,

Matheus Gadelha,

Yannick Hold-Geoffroy,

Xin Sun,

Diego Gutierrez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bernal-Berdun_2025_CVPR, author = {Bernal-Berdun, Edurne and Serrano, Ana and Masia, Belen and Gadelha, Matheus and Hold-Geoffroy, Yannick and Sun, Xin and Gutierrez, Diego}, title = {PreciseCam: Precise Camera Control for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2724-2733} }
SET: Spectral Enhancement for Tiny Object Detection: Huixin Sun,

Runqi Wang,

Yanjing Li,

Linlin Yang,

Shaohui Lin,

Xianbin Cao,

Baochang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Huixin and Wang, Runqi and Li, Yanjing and Yang, Linlin and Lin, Shaohui and Cao, Xianbin and Zhang, Baochang}, title = {SET: Spectral Enhancement for Tiny Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4713-4723} }
Differentiable Inverse Rendering with Interpretable Basis BRDFs: Hoon-Gyu Chung,

Seokjun Choi,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2025_CVPR, author = {Chung, Hoon-Gyu and Choi, Seokjun and Baek, Seung-Hwan}, title = {Differentiable Inverse Rendering with Interpretable Basis BRDFs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {475-484} }
EquiPose: Exploiting Permutation Equivariance for Relative Camera Pose Estimation: Yuzhen Liu,

Qiulei Dong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzhen and Dong, Qiulei}, title = {EquiPose: Exploiting Permutation Equivariance for Relative Camera Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1127-1137} }
Face Forgery Video Detection via Temporal Forgery Cue Unraveling: Zonghui Guo,

Yingjie Liu,

Jie Zhang,

Haiyong Zheng,

Shiguang Shan; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zonghui and Liu, Yingjie and Zhang, Jie and Zheng, Haiyong and Shan, Shiguang}, title = {Face Forgery Video Detection via Temporal Forgery Cue Unraveling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7396-7405} }
Temporally Consistent Object-Centric Learning by Contrasting Slots: Anna Manasyan,

Maximilian Seitzer,

Filip Radovic,

Georg Martius,

Andrii Zadaianchuk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Manasyan_2025_CVPR, author = {Manasyan, Anna and Seitzer, Maximilian and Radovic, Filip and Martius, Georg and Zadaianchuk, Andrii}, title = {Temporally Consistent Object-Centric Learning by Contrasting Slots}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5401-5411} }
MC^2: Multi-concept Guidance for Customized Multi-concept Generation: Jiaxiu Jiang,

Yabo Zhang,

Kailai Feng,

Xiaohe Wu,

Wenbo Li,

Renjing Pei,

Fan Li,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jiaxiu and Zhang, Yabo and Feng, Kailai and Wu, Xiaohe and Li, Wenbo and Pei, Renjing and Li, Fan and Zuo, Wangmeng}, title = {MC{\textasciicircum}2: Multi-concept Guidance for Customized Multi-concept Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2802-2812} }
Multi-modal Vision Pre-training for Medical Image Analysis: Shaohao Rui,

Lingzhi Chen,

Zhenyu Tang,

Lilong Wang,

Mianxin Liu,

Shaoting Zhang,

Xiaosong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rui_2025_CVPR, author = {Rui, Shaohao and Chen, Lingzhi and Tang, Zhenyu and Wang, Lilong and Liu, Mianxin and Zhang, Shaoting and Wang, Xiaosong}, title = {Multi-modal Vision Pre-training for Medical Image Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5164-5174} }
STEP: Enhancing Video-LLMs' Compositional Reasoning by Spatio-Temporal Graph-guided Self-Training: Haiyi Qiu,

Minghe Gao,

Long Qian,

Kaihang Pan,

Qifan Yu,

Juncheng Li,

Wenjie Wang,

Siliang Tang,

Yueting Zhuang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Haiyi and Gao, Minghe and Qian, Long and Pan, Kaihang and Yu, Qifan and Li, Juncheng and Wang, Wenjie and Tang, Siliang and Zhuang, Yueting and Chua, Tat-Seng}, title = {STEP: Enhancing Video-LLMs' Compositional Reasoning by Spatio-Temporal Graph-guided Self-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3284-3294} }
LIM: Large Interpolator Model for Dynamic Reconstruction: Remy Sabathier,

Niloy J. Mitra,

David Novotny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sabathier_2025_CVPR, author = {Sabathier, Remy and Mitra, Niloy J. and Novotny, David}, title = {LIM: Large Interpolator Model for Dynamic Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6154-6164} }
AutoPresent: Designing Structured Visuals from Scratch: Jiaxin Ge,

Zora Zhiruo Wang,

Xuhui Zhou,

Yi-Hao Peng,

Sanjay Subramanian,

Qinyue Tan,

Maarten Sap,

Alane Suhr,

Daniel Fried,

Graham Neubig,

Trevor Darrell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_CVPR, author = {Ge, Jiaxin and Wang, Zora Zhiruo and Zhou, Xuhui and Peng, Yi-Hao and Subramanian, Sanjay and Tan, Qinyue and Sap, Maarten and Suhr, Alane and Fried, Daniel and Neubig, Graham and Darrell, Trevor}, title = {AutoPresent: Designing Structured Visuals from Scratch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2902-2911} }
VisionArena: 230k Real World User-VLM Conversations with Preference Labels: Christopher Chou,

Lisa Dunlap,

Koki Mashita,

Krishna Mandal,

Trevor Darrell,

Ion Stoica,

Joseph E. Gonzalez,

Wei-Lin Chiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chou_2025_CVPR, author = {Chou, Christopher and Dunlap, Lisa and Mashita, Koki and Mandal, Krishna and Darrell, Trevor and Stoica, Ion and Gonzalez, Joseph E. and Chiang, Wei-Lin}, title = {VisionArena: 230k Real World User-VLM Conversations with Preference Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3877-3887} }
FAM Diffusion: Frequency and Attention Modulation for High-Resolution Image Generation with Stable Diffusion: Haosen Yang,

Adrian Bulat,

Isma Hadji,

Hai X. Pham,

Xiatian Zhu,

Georgios Tzimiropoulos,

Brais Martinez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Haosen and Bulat, Adrian and Hadji, Isma and Pham, Hai X. and Zhu, Xiatian and Tzimiropoulos, Georgios and Martinez, Brais}, title = {FAM Diffusion: Frequency and Attention Modulation for High-Resolution Image Generation with Stable Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2459-2468} }
MultiGO: Towards Multi-level Geometry Learning for Monocular 3D Textured Human Reconstruction: Gangjian Zhang,

Nanjie Yao,

Shunsi Zhang,

Hanfeng Zhao,

Guoliang Pang,

Jian Shu,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Gangjian and Yao, Nanjie and Zhang, Shunsi and Zhao, Hanfeng and Pang, Guoliang and Shu, Jian and Wang, Hao}, title = {MultiGO: Towards Multi-level Geometry Learning for Monocular 3D Textured Human Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {338-347} }
Generative Photomontage: Sean J. Liu,

Nupur Kumari,

Ariel Shamir,

Jun-Yan Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Sean J. and Kumari, Nupur and Shamir, Ariel and Zhu, Jun-Yan}, title = {Generative Photomontage}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7931-7941} }
Multi-view Reconstruction via SfM-guided Monocular Depth Estimation: Haoyu Guo,

He Zhu,

Sida Peng,

Haotong Lin,

Yunzhi Yan,

Tao Xie,

Wenguan Wang,

Xiaowei Zhou,

Hujun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Haoyu and Zhu, He and Peng, Sida and Lin, Haotong and Yan, Yunzhi and Xie, Tao and Wang, Wenguan and Zhou, Xiaowei and Bao, Hujun}, title = {Multi-view Reconstruction via SfM-guided Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5272-5282} }
HuMoCon: Concept Discovery for Human Motion Understanding: Qihang Fang,

Chengcheng Tang,

Bugra Tekin,

Shugao Ma,

Yanchao Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Qihang and Tang, Chengcheng and Tekin, Bugra and Ma, Shugao and Yang, Yanchao}, title = {HuMoCon: Concept Discovery for Human Motion Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7179-7190} }
FreeScene: Mixed Graph Diffusion for 3D Scene Synthesis from Free Prompts: Tongyuan Bai,

Wangyuanfan Bai,

Dong Chen,

Tieru Wu,

Manyi Li,

Rui Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Tongyuan and Bai, Wangyuanfan and Chen, Dong and Wu, Tieru and Li, Manyi and Ma, Rui}, title = {FreeScene: Mixed Graph Diffusion for 3D Scene Synthesis from Free Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5893-5903} }
Rethinking Correspondence-based Category-Level Object Pose Estimation: Huan Ren,

Wenfei Yang,

Shifeng Zhang,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Huan and Yang, Wenfei and Zhang, Shifeng and Zhang, Tianzhu}, title = {Rethinking Correspondence-based Category-Level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1170-1179} }
Curriculum Direct Preference Optimization for Diffusion and Consistency Models: Florinel-Alin Croitoru,

Vlad Hondru,

Radu Tudor Ionescu,

Nicu Sebe,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Croitoru_2025_CVPR, author = {Croitoru, Florinel-Alin and Hondru, Vlad and Ionescu, Radu Tudor and Sebe, Nicu and Shah, Mubarak}, title = {Curriculum Direct Preference Optimization for Diffusion and Consistency Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2824-2834} }
Personalized Preference Fine-tuning of Diffusion Models: Meihua Dang,

Anikait Singh,

Linqi Zhou,

Stefano Ermon,

Jiaming Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2025_CVPR, author = {Dang, Meihua and Singh, Anikait and Zhou, Linqi and Ermon, Stefano and Song, Jiaming}, title = {Personalized Preference Fine-tuning of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8020-8030} }
NN-Former: Rethinking Graph Structure in Neural Architecture Representation: Ruihan Xu,

Haokui Zhang,

Yaowei Wang,

Wei Zeng,

Shiliang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Ruihan and Zhang, Haokui and Wang, Yaowei and Zeng, Wei and Zhang, Shiliang}, title = {NN-Former: Rethinking Graph Structure in Neural Architecture Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10004-10014} }
A Unified Image-Dense Annotation Generation Model for Underwater Scenes: Hongkai Lin,

Dingkang Liang,

Zhenghao Qi,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Hongkai and Liang, Dingkang and Qi, Zhenghao and Bai, Xiang}, title = {A Unified Image-Dense Annotation Generation Model for Underwater Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {961-970} }
NTR-Gaussian: Nighttime Dynamic Thermal Reconstruction with 4D Gaussian Splatting Based on Thermodynamics: Kun Yang,

Yuxiang Liu,

Zeyu Cui,

Yu Liu,

Maojun Zhang,

Shen Yan,

Qing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Kun and Liu, Yuxiang and Cui, Zeyu and Liu, Yu and Zhang, Maojun and Yan, Shen and Wang, Qing}, title = {NTR-Gaussian: Nighttime Dynamic Thermal Reconstruction with 4D Gaussian Splatting Based on Thermodynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {691-700} }
FSHNet: Fully Sparse Hybrid Network for 3D Object Detection: Shuai Liu,

Mingyue Cui,

Boyang Li,

Quanmin Liang,

Tinghe Hong,

Kai Huang,

Yunxiao Shan,

Kai Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shuai and Cui, Mingyue and Li, Boyang and Liang, Quanmin and Hong, Tinghe and Huang, Kai and Shan, Yunxiao and Huang, Kai}, title = {FSHNet: Fully Sparse Hybrid Network for 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8900-8909} }
JTD-UAV: MLLM-Enhanced Joint Tracking and Description Framework for Anti-UAV Systems: Yifan Wang,

Jian Zhao,

Zhaoxin Fan,

Xin Zhang,

Xuecheng Wu,

Yudian Zhang,

Lei Jin,

Xinyue Li,

Gang Wang,

Mengxi Jia,

Ping Hu,

Zheng Zhu,

Xuelong Li; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yifan and Zhao, Jian and Fan, Zhaoxin and Zhang, Xin and Wu, Xuecheng and Zhang, Yudian and Jin, Lei and Li, Xinyue and Wang, Gang and Jia, Mengxi and Hu, Ping and Zhu, Zheng and Li, Xuelong}, title = {JTD-UAV: MLLM-Enhanced Joint Tracking and Description Framework for Anti-UAV Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1633-1644} }
HaWoR: World-Space Hand Motion Reconstruction from Egocentric Videos: Jinglei Zhang,

Jiankang Deng,

Chao Ma,

Rolandos Alexandros Potamias; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinglei and Deng, Jiankang and Ma, Chao and Potamias, Rolandos Alexandros}, title = {HaWoR: World-Space Hand Motion Reconstruction from Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1805-1815} }
High Temporal Consistency through Semantic Similarity Propagation in Semi-Supervised Video Semantic Segmentation for Autonomous Flight: Cédric Vincent,

Taehyoung Kim,

Henri Meeß; [pdf] [supp]
[bibtex]
@InProceedings{Vincent_2025_CVPR, author = {Vincent, C\'edric and Kim, Taehyoung and Mee{\ss}, Henri}, title = {High Temporal Consistency through Semantic Similarity Propagation in Semi-Supervised Video Semantic Segmentation for Autonomous Flight}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1461-1471} }
Generative Gaussian Splatting for Unbounded 3D City Generation: Haozhe Xie,

Zhaoxi Chen,

Fangzhou Hong,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Haozhe and Chen, Zhaoxi and Hong, Fangzhou and Liu, Ziwei}, title = {Generative Gaussian Splatting for Unbounded 3D City Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6111-6120} }
GeoMM: On Geodesic Perspective for Multi-modal Learning: Shibin Mei,

Hang Wang,

Bingbing Ni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Shibin and Wang, Hang and Ni, Bingbing}, title = {GeoMM: On Geodesic Perspective for Multi-modal Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4776-4786} }
VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning: Xueqing Wu,

Yuheng Ding,

Bingxuan Li,

Pan Lu,

Da Yin,

Kai-Wei Chang,

Nanyun Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Xueqing and Ding, Yuheng and Li, Bingxuan and Lu, Pan and Yin, Da and Chang, Kai-Wei and Peng, Nanyun}, title = {VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9527-9537} }
Adaptive Dropout: Unleashing Dropout across Layers for Generalizable Image Super-Resolution: Hang Xu,

Jie Huang,

Wei Yu,

Jiangtong Tan,

Zhen Zou,

Feng Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Hang and Huang, Jie and Yu, Wei and Tan, Jiangtong and Zou, Zhen and Zhao, Feng}, title = {Adaptive Dropout: Unleashing Dropout across Layers for Generalizable Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7513-7523} }
Breaking the Memory Barrier of Contrastive Loss via Tile-Based Strategy: Zesen Cheng,

Hang Zhang,

Kehan Li,

Sicong Leng,

Zhiqiang Hu,

Fei Wu,

Deli Zhao,

Xin Li,

Lidong Bing; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Zesen and Zhang, Hang and Li, Kehan and Leng, Sicong and Hu, Zhiqiang and Wu, Fei and Zhao, Deli and Li, Xin and Bing, Lidong}, title = {Breaking the Memory Barrier of Contrastive Loss via Tile-Based Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10036-10045} }
Learning Phase Distortion with Selective State Space Models for Video Turbulence Mitigation: Xingguang Zhang,

Nicholas Chimitt,

Xijun Wang,

Yu Yuan,

Stanley H. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xingguang and Chimitt, Nicholas and Wang, Xijun and Yuan, Yu and Chan, Stanley H.}, title = {Learning Phase Distortion with Selective State Space Models for Video Turbulence Mitigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2127-2138} }
RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training: Raktim Gautam Goswami,

Prashanth Krishnamurthy,

Yann LeCun,

Farshad Khorrami; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goswami_2025_CVPR, author = {Goswami, Raktim Gautam and Krishnamurthy, Prashanth and LeCun, Yann and Khorrami, Farshad}, title = {RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6930-6939} }
Distraction is All You Need for Multimodal Large Language Model Jailbreaking: Zuopeng Yang,

Jiluan Fan,

Anli Yan,

Erdun Gao,

Xin Lin,

Tao Li,

Kanghua Mo,

Changyu Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zuopeng and Fan, Jiluan and Yan, Anli and Gao, Erdun and Lin, Xin and Li, Tao and Mo, Kanghua and Dong, Changyu}, title = {Distraction is All You Need for Multimodal Large Language Model Jailbreaking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9467-9476} }
Learning to Normalize on the SPD Manifold under Bures-Wasserstein Geometry: Rui Wang,

Shaocheng Jin,

Ziheng Chen,

Xiaoqing Luo,

Xiao-Jun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Rui and Jin, Shaocheng and Chen, Ziheng and Luo, Xiaoqing and Wu, Xiao-Jun}, title = {Learning to Normalize on the SPD Manifold under Bures-Wasserstein Geometry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8289-8298} }
SAMWISE: Infusing Wisdom in SAM2 for Text-Driven Video Segmentation: Claudia Cuttano,

Gabriele Trivigno,

Gabriele Rosi,

Carlo Masone,

Giuseppe Averta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cuttano_2025_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Rosi, Gabriele and Masone, Carlo and Averta, Giuseppe}, title = {SAMWISE: Infusing Wisdom in SAM2 for Text-Driven Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3395-3405} }
BEVDiffuser: Plug-and-Play Diffusion Model for BEV Denoising with Ground-Truth Guidance: Xin Ye,

Burhaneddin Yaman,

Sheng Cheng,

Feng Tao,

Abhirup Mallik,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Xin and Yaman, Burhaneddin and Cheng, Sheng and Tao, Feng and Mallik, Abhirup and Ren, Liu}, title = {BEVDiffuser: Plug-and-Play Diffusion Model for BEV Denoising with Ground-Truth Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1495-1504} }
FinePhys: Fine-grained Human Action Generation by Explicitly Incorporating Physical Laws for Effective Skeletal Guidance: Dian Shao,

Mingfei Shi,

Shengda Xu,

Haodong Chen,

Yongle Huang,

Binglu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Dian and Shi, Mingfei and Xu, Shengda and Chen, Haodong and Huang, Yongle and Wang, Binglu}, title = {FinePhys: Fine-grained Human Action Generation by Explicitly Incorporating Physical Laws for Effective Skeletal Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1905-1916} }
SeedVR: Seeding Infinity in Diffusion Transformer Towards Generic Video Restoration: Jianyi Wang,

Zhijie Lin,

Meng Wei,

Yang Zhao,

Ceyuan Yang,

Chen Change Loy,

Lu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jianyi and Lin, Zhijie and Wei, Meng and Zhao, Yang and Yang, Ceyuan and Loy, Chen Change and Jiang, Lu}, title = {SeedVR: Seeding Infinity in Diffusion Transformer Towards Generic Video Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2161-2172} }
Beyond Generation: A Diffusion-based Low-level Feature Extractor for Detecting AI-generated Images: Nan Zhong,

Haoyu Chen,

Yiran Xu,

Zhenxing Qian,

Xinpeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Nan and Chen, Haoyu and Xu, Yiran and Qian, Zhenxing and Zhang, Xinpeng}, title = {Beyond Generation: A Diffusion-based Low-level Feature Extractor for Detecting AI-generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8258-8268} }
Optical-Flow Guided Prompt Optimization for Coherent Video Generation: Hyelin Nam,

Jaemin Kim,

Dohun Lee,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_CVPR, author = {Nam, Hyelin and Kim, Jaemin and Lee, Dohun and Ye, Jong Chul}, title = {Optical-Flow Guided Prompt Optimization for Coherent Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7837-7846} }
Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection: Enshen Zhou,

Qi Su,

Cheng Chi,

Zhizheng Zhang,

Zhongyuan Wang,

Tiejun Huang,

Lu Sheng,

He Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Enshen and Su, Qi and Chi, Cheng and Zhang, Zhizheng and Wang, Zhongyuan and Huang, Tiejun and Sheng, Lu and Wang, He}, title = {Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6919-6929} }
Rethinking Reconstruction and Denoising in the Dark: New Perspective, General Architecture and Beyond: Tengyu Ma,

Long Ma,

Ziye Li,

Yuetong Wang,

Jinyuan Liu,

Chengpei Xu,

Risheng Liu; [pdf]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Tengyu and Ma, Long and Li, Ziye and Wang, Yuetong and Liu, Jinyuan and Xu, Chengpei and Liu, Risheng}, title = {Rethinking Reconstruction and Denoising in the Dark: New Perspective, General Architecture and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2323-2332} }
Federated Learning with Domain Shift Eraser: Zheng Wang,

Zihui Wang,

Zheng Wang,

Xiaoliang Fan,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zheng and Wang, Zihui and Wang, Zheng and Fan, Xiaoliang and Wang, Cheng}, title = {Federated Learning with Domain Shift Eraser}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4978-4987} }
DiTCtrl: Exploring Attention Control in Multi-Modal Diffusion Transformer for Tuning-Free Multi-Prompt Longer Video Generation: Minghong Cai,

Xiaodong Cun,

Xiaoyu Li,

Wenze Liu,

Zhaoyang Zhang,

Yong Zhang,

Ying Shan,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Minghong and Cun, Xiaodong and Li, Xiaoyu and Liu, Wenze and Zhang, Zhaoyang and Zhang, Yong and Shan, Ying and Yue, Xiangyu}, title = {DiTCtrl: Exploring Attention Control in Multi-Modal Diffusion Transformer for Tuning-Free Multi-Prompt Longer Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7763-7772} }
Link to the Past: Temporal Propagation for Fast 3D Human Reconstruction from Monocular Video: Matthew Marchellus,

Nadhira Noor,

In Kyu Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marchellus_2025_CVPR, author = {Marchellus, Matthew and Noor, Nadhira and Park, In Kyu}, title = {Link to the Past: Temporal Propagation for Fast 3D Human Reconstruction from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6190-6199} }
Deterministic Certification of Graph Neural Networks against Graph Poisoning Attacks with Arbitrary Perturbations: Jiate Li,

Meng Pang,

Yun Dong,

Binghui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiate and Pang, Meng and Dong, Yun and Wang, Binghui}, title = {Deterministic Certification of Graph Neural Networks against Graph Poisoning Attacks with Arbitrary Perturbations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5020-5029} }
A3: Few-shot Prompt Learning of Unlearnable Examples with Cross-Modal Adversarial Feature Alignment: Xuan Wang,

Xitong Gao,

Dongping Liao,

Tianrui Qin,

Yu-liang Lu,

Cheng-zhong Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xuan and Gao, Xitong and Liao, Dongping and Qin, Tianrui and Lu, Yu-liang and Xu, Cheng-zhong}, title = {A3: Few-shot Prompt Learning of Unlearnable Examples with Cross-Modal Adversarial Feature Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9507-9516} }
MVPaint: Synchronized Multi-View Diffusion for Painting Anything 3D: Wei Cheng,

Juncheng Mu,

Xianfang Zeng,

Xin Chen,

Anqi Pang,

Chi Zhang,

Zhibin Wang,

Bin Fu,

Gang Yu,

Ziwei Liu,

Liang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Wei and Mu, Juncheng and Zeng, Xianfang and Chen, Xin and Pang, Anqi and Zhang, Chi and Wang, Zhibin and Fu, Bin and Yu, Gang and Liu, Ziwei and Pan, Liang}, title = {MVPaint: Synchronized Multi-View Diffusion for Painting Anything 3D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {585-594} }
ASAP: Advancing Semantic Alignment Promotes Multi-Modal Manipulation Detecting and Grounding: Zhenxing Zhang,

Yaxiong Wang,

Lechao Cheng,

Zhun Zhong,

Dan Guo,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhenxing and Wang, Yaxiong and Cheng, Lechao and Zhong, Zhun and Guo, Dan and Wang, Meng}, title = {ASAP: Advancing Semantic Alignment Promotes Multi-Modal Manipulation Detecting and Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4005-4014} }
MotiF: Making Text Count in Image Animation with Motion Focal Loss: Shijie Wang,

Samaneh Azadi,

Rohit Girdhar,

Saketh Rambhatla,

Chen Sun,

Xi Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shijie and Azadi, Samaneh and Girdhar, Rohit and Rambhatla, Saketh and Sun, Chen and Yin, Xi}, title = {MotiF: Making Text Count in Image Animation with Motion Focal Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7773-7783} }
Towards Explicit Geometry-Reflectance Collaboration for Generalized LiDAR Segmentation in Adverse Weather: Longyu Yang,

Ping Hu,

Shangbo Yuan,

Lu Zhang,

Jun Liu,

Hengtao Shen,

Xiaofeng Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Longyu and Hu, Ping and Yuan, Shangbo and Zhang, Lu and Liu, Jun and Shen, Hengtao and Zhu, Xiaofeng}, title = {Towards Explicit Geometry-Reflectance Collaboration for Generalized LiDAR Segmentation in Adverse Weather}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {139-149} }
MaskGaussian: Adaptive 3D Gaussian Representation from Probabilistic Masks: Yifei Liu,

Zhihang Zhong,

Yifan Zhan,

Sheng Xu,

Xiao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yifei and Zhong, Zhihang and Zhan, Yifan and Xu, Sheng and Sun, Xiao}, title = {MaskGaussian: Adaptive 3D Gaussian Representation from Probabilistic Masks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {681-690} }
SoundVista: Novel-View Ambient Sound Synthesis via Visual-Acoustic Binding: Mingfei Chen,

Israel D. Gebru,

Ishwarya Ananthabhotla,

Christian Richardt,

Dejan Markovic,

Jake Sandakly,

Steven Krenn,

Todd Keebler,

Eli Shlizerman,

Alexander Richard; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Mingfei and Gebru, Israel D. and Ananthabhotla, Ishwarya and Richardt, Christian and Markovic, Dejan and Sandakly, Jake and Krenn, Steven and Keebler, Todd and Shlizerman, Eli and Richard, Alexander}, title = {SoundVista: Novel-View Ambient Sound Synthesis via Visual-Acoustic Binding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8331-8341} }
CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation: Reza Abbasi,

Ali Nazari,

Aminreza Sefid,

Mohammadali Banayeeanzade,

Mohammad Hossein Rohban,

Mahdieh Soleymani Baghshah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abbasi_2025_CVPR, author = {Abbasi, Reza and Nazari, Ali and Sefid, Aminreza and Banayeeanzade, Mohammadali and Rohban, Mohammad Hossein and Baghshah, Mahdieh Soleymani}, title = {CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9308-9317} }
Navigating Image Restoration with VAR's Distribution Alignment Prior: Siyang Wang,

Naishan Zheng,

Jie Huang,

Feng Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Siyang and Zheng, Naishan and Huang, Jie and Zhao, Feng}, title = {Navigating Image Restoration with VAR's Distribution Alignment Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7559-7569} }
Dissecting and Mitigating Diffusion Bias via Mechanistic Interpretability: Yingdong Shi,

Changming Li,

Yifan Wang,

Yongxiang Zhao,

Anqi Pang,

Sibei Yang,

Jingyi Yu,

Kan Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Yingdong and Li, Changming and Wang, Yifan and Zhao, Yongxiang and Pang, Anqi and Yang, Sibei and Yu, Jingyi and Ren, Kan}, title = {Dissecting and Mitigating Diffusion Bias via Mechanistic Interpretability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8192-8202} }
Graph Neural Network Combining Event Stream and Periodic Aggregation for Low-Latency Event-based Vision: Manon Dampfhoffer,

Thomas Mesquida,

Damien Joubert,

Thomas Dalgaty,

Pascal Vivet,

Christoph Posch; [pdf]
[bibtex]
@InProceedings{Dampfhoffer_2025_CVPR, author = {Dampfhoffer, Manon and Mesquida, Thomas and Joubert, Damien and Dalgaty, Thomas and Vivet, Pascal and Posch, Christoph}, title = {Graph Neural Network Combining Event Stream and Periodic Aggregation for Low-Latency Event-based Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6909-6918} }
ArtFormer: Controllable Generation of Diverse 3D Articulated Objects: Jiayi Su,

Youhe Feng,

Zheng Li,

Jinhua Song,

Yangfan He,

Botao Ren,

Botian Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Jiayi and Feng, Youhe and Li, Zheng and Song, Jinhua and He, Yangfan and Ren, Botao and Xu, Botian}, title = {ArtFormer: Controllable Generation of Diverse 3D Articulated Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1894-1904} }
Bridging Gait Recognition and Large Language Models Sequence Modeling: Shaopeng Yang,

Jilong Wang,

Saihui Hou,

Xu Liu,

Chunshui Cao,

Liang Wang,

Yongzhen Huang; [pdf]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Shaopeng and Wang, Jilong and Hou, Saihui and Liu, Xu and Cao, Chunshui and Wang, Liang and Huang, Yongzhen}, title = {Bridging Gait Recognition and Large Language Models Sequence Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3460-3469} }
DiSRT-In-Bed: Diffusion-Based Sim-to-Real Transfer Framework for In-Bed Human Mesh Recovery: Jing Gao,

Ce Zheng,

Laszlo A. Jeni,

Zackory Erickson; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Jing and Zheng, Ce and Jeni, Laszlo A. and Erickson, Zackory}, title = {DiSRT-In-Bed: Diffusion-Based Sim-to-Real Transfer Framework for In-Bed Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1829-1838} }
COUNTS: Benchmarking Object Detectors and Multimodal Large Language Models under Distribution Shifts: Jiansheng Li,

Xingxuan Zhang,

Hao Zou,

Yige Guo,

Renzhe Xu,

Yilong Liu,

Chuzhao Zhu,

Yue He,

Peng Cui; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiansheng and Zhang, Xingxuan and Zou, Hao and Guo, Yige and Xu, Renzhe and Liu, Yilong and Zhu, Chuzhao and He, Yue and Cui, Peng}, title = {COUNTS: Benchmarking Object Detectors and Multimodal Large Language Models under Distribution Shifts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9186-9198} }
HOT: Hadamard-based Optimized Training: Seonggon Kim,

Juncheol Shin,

Seung-taek Woo,

Eunhyeok Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Seonggon and Shin, Juncheol and Woo, Seung-taek and Park, Eunhyeok}, title = {HOT: Hadamard-based Optimized Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4787-4796} }
TokenFlow: Unified Image Tokenizer for Multimodal Understanding and Generation: Liao Qu,

Huichao Zhang,

Yiheng Liu,

Xu Wang,

Yi Jiang,

Yiming Gao,

Hu Ye,

Daniel K. Du,

Zehuan Yuan,

Xinglong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Liao and Zhang, Huichao and Liu, Yiheng and Wang, Xu and Jiang, Yi and Gao, Yiming and Ye, Hu and Du, Daniel K. and Yuan, Zehuan and Wu, Xinglong}, title = {TokenFlow: Unified Image Tokenizer for Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2545-2555} }
SnapGen-V: Generating a Five-Second Video within Five Seconds on a Mobile Device: Yushu Wu,

Zhixing Zhang,

Yanyu Li,

Yanwu Xu,

Anil Kag,

Yang Sui,

Huseyin Coskun,

Ke Ma,

Aleksei Lebedev,

Ju Hu,

Dimitris N. Metaxas,

Yanzhi Wang,

Sergey Tulyakov,

Jian Ren; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yushu and Zhang, Zhixing and Li, Yanyu and Xu, Yanwu and Kag, Anil and Sui, Yang and Coskun, Huseyin and Ma, Ke and Lebedev, Aleksei and Hu, Ju and Metaxas, Dimitris N. and Wang, Yanzhi and Tulyakov, Sergey and Ren, Jian}, title = {SnapGen-V: Generating a Five-Second Video within Five Seconds on a Mobile Device}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2479-2490} }
Adapting Dense Matching for Homography Estimation with Grid-based Acceleration: Kaining Zhang,

Yuxin Deng,

Jiayi Ma,

Paolo Favaro; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kaining and Deng, Yuxin and Ma, Jiayi and Favaro, Paolo}, title = {Adapting Dense Matching for Homography Estimation with Grid-based Acceleration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6294-6303} }
CoSDH: Communication-Efficient Collaborative Perception via Supply-Demand Awareness and Intermediate-Late Hybridization: Junhao Xu,

Yanan Zhang,

Zhi Cai,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Junhao and Zhang, Yanan and Cai, Zhi and Huang, Di}, title = {CoSDH: Communication-Efficient Collaborative Perception via Supply-Demand Awareness and Intermediate-Late Hybridization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6834-6843} }
Stereo Anywhere: Robust Zero-Shot Deep Stereo Matching Even Where Either Stereo or Mono Fail: Luca Bartolomei,

Fabio Tosi,

Matteo Poggi,

Stefano Mattoccia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bartolomei_2025_CVPR, author = {Bartolomei, Luca and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano}, title = {Stereo Anywhere: Robust Zero-Shot Deep Stereo Matching Even Where Either Stereo or Mono Fail}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1013-1027} }
Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping: Guannan Lai,

Yujie Li,

Xiangkun Wang,

Junbo Zhang,

Tianrui Li,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Guannan and Li, Yujie and Wang, Xiangkun and Zhang, Junbo and Li, Tianrui and Yang, Xin}, title = {Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4894-4904} }
Where the Devil Hides: Deepfake Detectors Can No Longer Be Trusted: Shuaiwei Yuan,

Junyu Dong,

Yuezun Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Shuaiwei and Dong, Junyu and Li, Yuezun}, title = {Where the Devil Hides: Deepfake Detectors Can No Longer Be Trusted}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8764-8774} }
CaMuViD: Calibration-Free Multi-View Detection: Amir Etefaghi Daryani,

M. Usman Maqbool Bhutta,

Byron Hernandez,

Henry Medeiros; [pdf]
[bibtex]
@InProceedings{Daryani_2025_CVPR, author = {Daryani, Amir Etefaghi and Bhutta, M. Usman Maqbool and Hernandez, Byron and Medeiros, Henry}, title = {CaMuViD: Calibration-Free Multi-View Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1220-1229} }
Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled Prosody Adapting for Movie Dubbing: Zhedong Zhang,

Liang Li,

Chenggang Yan,

Chunshan Liu,

Anton van den Hengel,

Yuankai Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhedong and Li, Liang and Yan, Chenggang and Liu, Chunshan and van den Hengel, Anton and Qi, Yuankai}, title = {Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled Prosody Adapting for Movie Dubbing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {172-182} }
HVI: A New Color Space for Low-light Image Enhancement: Qingsen Yan,

Yixu Feng,

Cheng Zhang,

Guansong Pang,

Kangbiao Shi,

Peng Wu,

Wei Dong,

Jinqiu Sun,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Qingsen and Feng, Yixu and Zhang, Cheng and Pang, Guansong and Shi, Kangbiao and Wu, Peng and Dong, Wei and Sun, Jinqiu and Zhang, Yanning}, title = {HVI: A New Color Space for Low-light Image Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5678-5687} }
DualPM: Dual Posed-Canonical Point Maps for 3D Shape and Pose Reconstruction: Ben Kaye,

Tomas Jakab,

Shangzhe Wu,

Christian Ruprecht,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kaye_2025_CVPR, author = {Kaye, Ben and Jakab, Tomas and Wu, Shangzhe and Ruprecht, Christian and Vedaldi, Andrea}, title = {DualPM: Dual Posed-Canonical Point Maps for 3D Shape and Pose Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6425-6435} }
One Diffusion to Generate Them All: Duong H. Le,

Tuan Pham,

Sangho Lee,

Christopher Clark,

Aniruddha Kembhavi,

Stephan Mandt,

Ranjay Krishna,

Jiasen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2025_CVPR, author = {Le, Duong H. and Pham, Tuan and Lee, Sangho and Clark, Christopher and Kembhavi, Aniruddha and Mandt, Stephan and Krishna, Ranjay and Lu, Jiasen}, title = {One Diffusion to Generate Them All}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2671-2682} }
CoSER: Towards Consistent Dense Multiview Text-to-Image Generator for 3D Creation: Bonan Li,

Zicheng Zhang,

Xingyi Yang,

Xinchao Wang; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Bonan and Zhang, Zicheng and Yang, Xingyi and Wang, Xinchao}, title = {CoSER: Towards Consistent Dense Multiview Text-to-Image Generator for 3D Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2880-2890} }
UNEM: UNrolled Generalized EM for Transductive Few-Shot Learning: Long Zhou,

Fereshteh Shakeri,

Aymen Sadraoui,

Mounir Kaaniche,

Jean-Christophe Pesquet,

Ismail Ben Ayed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Long and Shakeri, Fereshteh and Sadraoui, Aymen and Kaaniche, Mounir and Pesquet, Jean-Christophe and Ben Ayed, Ismail}, title = {UNEM: UNrolled Generalized EM for Transductive Few-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9665-9675} }
G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation: Tianxing Chen,

Yao Mu,

Zhixuan Liang,

Zanxin Chen,

Shijia Peng,

Qiangyu Chen,

Mingkun Xu,

Ruizhen Hu,

Hongyuan Zhang,

Xuelong Li,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Tianxing and Mu, Yao and Liang, Zhixuan and Chen, Zanxin and Peng, Shijia and Chen, Qiangyu and Xu, Mingkun and Hu, Ruizhen and Zhang, Hongyuan and Li, Xuelong and Luo, Ping}, title = {G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1735-1744} }
Explaining Domain Shifts in Language: Concept Erasing for Interpretable Image Classification: Zequn Zeng,

Yudi Su,

Jianqiao Sun,

Tiansheng Wen,

Hao Zhang,

Zhengjue Wang,

Bo Chen,

Hongwei Liu,

Jiawei Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Zequn and Su, Yudi and Sun, Jianqiao and Wen, Tiansheng and Zhang, Hao and Wang, Zhengjue and Chen, Bo and Liu, Hongwei and Ma, Jiawei}, title = {Explaining Domain Shifts in Language: Concept Erasing for Interpretable Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9517-9526} }
Textured Gaussians for Enhanced 3D Scene Appearance Modeling: Brian Chao,

Hung-Yu Tseng,

Lorenzo Porzi,

Chen Gao,

Tuotuo Li,

Qinbo Li,

Ayush Saraf,

Jia-Bin Huang,

Johannes Kopf,

Gordon Wetzstein,

Changil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chao_2025_CVPR, author = {Chao, Brian and Tseng, Hung-Yu and Porzi, Lorenzo and Gao, Chen and Li, Tuotuo and Li, Qinbo and Saraf, Ayush and Huang, Jia-Bin and Kopf, Johannes and Wetzstein, Gordon and Kim, Changil}, title = {Textured Gaussians for Enhanced 3D Scene Appearance Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8964-8974} }
NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval: Zengrong Lin,

Zheng Wang,

Tianwen Qian,

Pan Mu,

Sixian Chan,

Cong Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Zengrong and Wang, Zheng and Qian, Tianwen and Mu, Pan and Chan, Sixian and Bai, Cong}, title = {NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9263-9273} }
Global-Local Tree Search in VLMs for 3D Indoor Scene Generation: Wei Deng,

Mengshi Qi,

Huadong Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Wei and Qi, Mengshi and Ma, Huadong}, title = {Global-Local Tree Search in VLMs for 3D Indoor Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8975-8984} }
GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models with Generative Flow Networks: Haoqiang Kang,

Enna Sachdeva,

Piyush Gupta,

Sangjae Bae,

Kwonjoon Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Haoqiang and Sachdeva, Enna and Gupta, Piyush and Bae, Sangjae and Lee, Kwonjoon}, title = {GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models with Generative Flow Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3815-3825} }
MAP: Unleashing Hybrid Mamba-Transformer Vision Backbone's Potential with Masked Autoregressive Pretraining: Yunze Liu,

Li Yi; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yunze and Yi, Li}, title = {MAP: Unleashing Hybrid Mamba-Transformer Vision Backbone's Potential with Masked Autoregressive Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9676-9685} }
Segment Any-Quality Images with Generative Latent Space Enhancement: Guangqian Guo,

Yong Guo,

Xuehui Yu,

Wenbo Li,

Yaoxing Wang,

Shan Gao; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Guangqian and Guo, Yong and Yu, Xuehui and Li, Wenbo and Wang, Yaoxing and Gao, Shan}, title = {Segment Any-Quality Images with Generative Latent Space Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2366-2376} }
CityWalker: Learning Embodied Urban Navigation from Web-Scale Videos: Xinhao Liu,

Jintong Li,

Yicheng Jiang,

Niranjan Sujay,

Zhicheng Yang,

Juexiao Zhang,

John Abanes,

Jing Zhang,

Chen Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xinhao and Li, Jintong and Jiang, Yicheng and Sujay, Niranjan and Yang, Zhicheng and Zhang, Juexiao and Abanes, John and Zhang, Jing and Feng, Chen}, title = {CityWalker: Learning Embodied Urban Navigation from Web-Scale Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6875-6885} }
Learning Visual Composition through Improved Semantic Guidance: Austin Stone,

Hagen Soltau,

Robert Geirhos,

Xi Yi,

Ye Xia,

Bingyi Cao,

Kaifeng Chen,

Abhijit Ogale,

Jonathon Shlens; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stone_2025_CVPR, author = {Stone, Austin and Soltau, Hagen and Geirhos, Robert and Yi, Xi and Xia, Ye and Cao, Bingyi and Chen, Kaifeng and Ogale, Abhijit and Shlens, Jonathon}, title = {Learning Visual Composition through Improved Semantic Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3740-3750} }
JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation: Yiyang Ma,

Xingchao Liu,

Xiaokang Chen,

Wen Liu,

Chengyue Wu,

Zhiyu Wu,

Zizheng Pan,

Zhenda Xie,

Haowei Zhang,

Xingkai Yu,

Liang Zhao,

Yisong Wang,

Jiaying Liu,

Chong Ruan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Yiyang and Liu, Xingchao and Chen, Xiaokang and Liu, Wen and Wu, Chengyue and Wu, Zhiyu and Pan, Zizheng and Xie, Zhenda and Zhang, Haowei and Yu, Xingkai and Zhao, Liang and Wang, Yisong and Liu, Jiaying and Ruan, Chong}, title = {JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7739-7751} }
Visual Prompting for One-shot Controllable Video Editing without Inversion: Zhengbo Zhang,

Yuxi Zhou,

Duo Peng,

Joo-Hwee Lim,

Zhigang Tu,

De Wen Soh,

Lin Geng Foo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhengbo and Zhou, Yuxi and Peng, Duo and Lim, Joo-Hwee and Tu, Zhigang and Soh, De Wen and Foo, Lin Geng}, title = {Visual Prompting for One-shot Controllable Video Editing without Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7784-7794} }
AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning: Kaixuan Wu,

Xinde Li,

Xinling Li,

Chuanfei Hu,

Guoliang Wu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Kaixuan and Li, Xinde and Li, Xinling and Hu, Chuanfei and Wu, Guoliang}, title = {AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3252-3261} }
Flash-Split: 2D Reflection Removal with Flash Cues and Latent Diffusion Separation: Tianfu Wang,

Mingyang Xie,

Haoming Cai,

Sachin Shah,

Christopher A. Metzler; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tianfu and Xie, Mingyang and Cai, Haoming and Shah, Sachin and Metzler, Christopher A.}, title = {Flash-Split: 2D Reflection Removal with Flash Cues and Latent Diffusion Separation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5688-5698} }
Attention IoU: Examining Biases in CelebA using Attention Maps: Aaron Serianni,

Tyler Zhu,

Olga Russakovsky,

Vikram V. Ramaswamy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Serianni_2025_CVPR, author = {Serianni, Aaron and Zhu, Tyler and Russakovsky, Olga and Ramaswamy, Vikram V.}, title = {Attention IoU: Examining Biases in CelebA using Attention Maps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4386-4397} }
HEIE: MLLM-Based Hierarchical Explainable AIGC Image Implausibility Evaluator: Fan Yang,

Ru Zhen,

Jianing Wang,

Yanhao Zhang,

Haoxiang Chen,

Haonan Lu,

Sicheng Zhao,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Fan and Zhen, Ru and Wang, Jianing and Zhang, Yanhao and Chen, Haoxiang and Lu, Haonan and Zhao, Sicheng and Ding, Guiguang}, title = {HEIE: MLLM-Based Hierarchical Explainable AIGC Image Implausibility Evaluator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3856-3866} }
Segment Any Motion in Videos: Nan Huang,

Wenzhao Zheng,

Chenfeng Xu,

Kurt Keutzer,

Shanghang Zhang,

Angjoo Kanazawa,

Qianqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Nan and Zheng, Wenzhao and Xu, Chenfeng and Keutzer, Kurt and Zhang, Shanghang and Kanazawa, Angjoo and Wang, Qianqian}, title = {Segment Any Motion in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3406-3416} }
Task-aware Cross-modal Feature Refinement Transformer with Large Language Models for Visual Grounding: Wenbo Chen,

Zhen Xu,

Ruotao Xu,

Si Wu,

Hau-San Wong; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Wenbo and Xu, Zhen and Xu, Ruotao and Wu, Si and Wong, Hau-San}, title = {Task-aware Cross-modal Feature Refinement Transformer with Large Language Models for Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3931-3941} }
PatchDEMUX: A Certifiably Robust Framework for Multi-label Classifiers Against Adversarial Patches: Dennis Jacob,

Chong Xiang,

Prateek Mittal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jacob_2025_CVPR, author = {Jacob, Dennis and Xiang, Chong and Mittal, Prateek}, title = {PatchDEMUX: A Certifiably Robust Framework for Multi-label Classifiers Against Adversarial Patches}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9944-9953} }
EgoTextVQA: Towards Egocentric Scene-Text Aware Video Question Answering: Sheng Zhou,

Junbin Xiao,

Qingyun Li,

Yicong Li,

Xun Yang,

Dan Guo,

Meng Wang,

Tat-Seng Chua,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Sheng and Xiao, Junbin and Li, Qingyun and Li, Yicong and Yang, Xun and Guo, Dan and Wang, Meng and Chua, Tat-Seng and Yao, Angela}, title = {EgoTextVQA: Towards Egocentric Scene-Text Aware Video Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3363-3373} }
Token Cropr: Faster ViTs for Quite a Few Tasks: Benjamin Bergner,

Christoph Lippert,

Aravindh Mahendran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bergner_2025_CVPR, author = {Bergner, Benjamin and Lippert, Christoph and Mahendran, Aravindh}, title = {Token Cropr: Faster ViTs for Quite a Few Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9740-9750} }
STCOcc: Sparse Spatial-Temporal Cascade Renovation for 3D Occupancy and Scene Flow Prediction: Zhimin Liao,

Ping Wei,

Shuaijia Chen,

Haoxuan Wang,

Ziyang Ren; [pdf] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Zhimin and Wei, Ping and Chen, Shuaijia and Wang, Haoxuan and Ren, Ziyang}, title = {STCOcc: Sparse Spatial-Temporal Cascade Renovation for 3D Occupancy and Scene Flow Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1516-1526} }
Resilient Sensor Fusion Under Adverse Sensor Failures via Multi-Modal Expert Fusion: Konyul Park,

Yecheol Kim,

Daehun Kim,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Konyul and Kim, Yecheol and Kim, Daehun and Choi, Jun Won}, title = {Resilient Sensor Fusion Under Adverse Sensor Failures via Multi-Modal Expert Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6720-6729} }
MambaVO: Deep Visual Odometry Based on Sequential Matching Refinement and Training Smoothing: Shuo Wang,

Wanting Li,

Yongcai Wang,

Zhaoxin Fan,

Zhe Huang,

Xudong Cai,

Jian Zhao,

Deying Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shuo and Li, Wanting and Wang, Yongcai and Fan, Zhaoxin and Huang, Zhe and Cai, Xudong and Zhao, Jian and Li, Deying}, title = {MambaVO: Deep Visual Odometry Based on Sequential Matching Refinement and Training Smoothing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1252-1262} }
IndoorGS: Geometric Cues Guided Gaussian Splatting for Indoor Scene Reconstruction: Cong Ruan,

Yuesong Wang,

Tao Guan,

Bin Zhang,

Lili Ju; [pdf] [supp]
[bibtex]
@InProceedings{Ruan_2025_CVPR, author = {Ruan, Cong and Wang, Yuesong and Guan, Tao and Zhang, Bin and Ju, Lili}, title = {IndoorGS: Geometric Cues Guided Gaussian Splatting for Indoor Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {844-853} }
Point-Cache: Test-time Dynamic and Hierarchical Cache for Robust and Generalizable Point Cloud Analysis: Hongyu Sun,

Qiuhong Ke,

Ming Cheng,

Yongcai Wang,

Deying Li,

Chenhui Gou,

Jianfei Cai; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Hongyu and Ke, Qiuhong and Cheng, Ming and Wang, Yongcai and Li, Deying and Gou, Chenhui and Cai, Jianfei}, title = {Point-Cache: Test-time Dynamic and Hierarchical Cache for Robust and Generalizable Point Cloud Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1263-1275} }
Stop Walking in Circles! Bailing Out Early in Projected Gradient Descent: Philip Doldo,

Derek Everett,

Amol Khanna,

Andre T Nguyen,

Edward Raff; [pdf] [arXiv]
[bibtex]
@InProceedings{Doldo_2025_CVPR, author = {Doldo, Philip and Everett, Derek and Khanna, Amol and Nguyen, Andre T and Raff, Edward}, title = {Stop Walking in Circles! Bailing Out Early in Projected Gradient Descent}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6373-6382} }
MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation: Zhenyu Wu,

Yuheng Zhou,

Xiuwei Xu,

Ziwei Wang,

Haibin Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Zhenyu and Zhou, Yuheng and Xu, Xiuwei and Wang, Ziwei and Yan, Haibin}, title = {MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1714-1723} }
Stable-SCore: A Stable Registration-based Framework for 3D Shape Correspondence: Haolin Liu,

Xiaohang Zhan,

Zizheng Yan,

Zhongjin Luo,

Yuxin Wen,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Haolin and Zhan, Xiaohang and Yan, Zizheng and Luo, Zhongjin and Wen, Yuxin and Han, Xiaoguang}, title = {Stable-SCore: A Stable Registration-based Framework for 3D Shape Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {917-928} }
Beyond Single-Modal Boundary: Cross-Modal Anomaly Detection through Visual Prototype and Harmonization: Kai Mao,

Ping Wei,

Yiyang Lian,

Yangyang Wang,

Nanning Zheng; [pdf]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Kai and Wei, Ping and Lian, Yiyang and Wang, Yangyang and Zheng, Nanning}, title = {Beyond Single-Modal Boundary: Cross-Modal Anomaly Detection through Visual Prototype and Harmonization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9964-9973} }
Align-KD: Distilling Cross-Modal Alignment Knowledge for Mobile Vision-Language Large Model Enhancement: Qianhan Feng,

Wenshuo Li,

Tong Lin,

Xinghao Chen; [pdf]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Qianhan and Li, Wenshuo and Lin, Tong and Chen, Xinghao}, title = {Align-KD: Distilling Cross-Modal Alignment Knowledge for Mobile Vision-Language Large Model Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4178-4188} }
Pose Priors from Language Models: Sanjay Subramanian,

Evonne Ng,

Lea Müller,

Dan Klein,

Shiry Ginosar,

Trevor Darrell; [pdf] [supp]
[bibtex]
@InProceedings{Subramanian_2025_CVPR, author = {Subramanian, Sanjay and Ng, Evonne and M\"uller, Lea and Klein, Dan and Ginosar, Shiry and Darrell, Trevor}, title = {Pose Priors from Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7125-7135} }
LogoSP: Local-global Grouping of Superpoints for Unsupervised Semantic Segmentation of 3D Point Clouds: Zihui Zhang,

Weisheng Dai,

Hongtao Wen,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihui and Dai, Weisheng and Wen, Hongtao and Yang, Bo}, title = {LogoSP: Local-global Grouping of Superpoints for Unsupervised Semantic Segmentation of 3D Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1374-1384} }
Exploring Intrinsic Normal Prototypes within a Single Image for Universal Anomaly Detection: Wei Luo,

Yunkang Cao,

Haiming Yao,

Xiaotian Zhang,

Jianan Lou,

Yuqi Cheng,

Weiming Shen,

Wenyong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Wei and Cao, Yunkang and Yao, Haiming and Zhang, Xiaotian and Lou, Jianan and Cheng, Yuqi and Shen, Weiming and Yu, Wenyong}, title = {Exploring Intrinsic Normal Prototypes within a Single Image for Universal Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9974-9983} }
Augmenting Perceptual Super-Resolution via Image Quality Predictors: Fengjia Zhang,

Samrudhdhi B. Rangrej,

Tristan Aumentado-Armstrong,

Afsaneh Fazly,

Alex Levinshtein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Fengjia and Rangrej, Samrudhdhi B. and Aumentado-Armstrong, Tristan and Fazly, Afsaneh and Levinshtein, Alex}, title = {Augmenting Perceptual Super-Resolution via Image Quality Predictors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2311-2322} }
TurboFill: Adapting Few-step Text-to-image Model for Fast Image Inpainting: Liangbin Xie,

Daniil Pakhomov,

Zhonghao Wang,

Zongze Wu,

Ziyan Chen,

Yuqian Zhou,

Haitian Zheng,

Zhifei Zhang,

Zhe Lin,

Jiantao Zhou,

Chao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Liangbin and Pakhomov, Daniil and Wang, Zhonghao and Wu, Zongze and Chen, Ziyan and Zhou, Yuqian and Zheng, Haitian and Zhang, Zhifei and Lin, Zhe and Zhou, Jiantao and Dong, Chao}, title = {TurboFill: Adapting Few-step Text-to-image Model for Fast Image Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7613-7622} }
Stochastic Human Motion Prediction with Memory of Action Transition and Action Characteristic: Jianwei Tang,

Hong Yang,

Tengyue Chen,

Jian-Fang Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Jianwei and Yang, Hong and Chen, Tengyue and Hu, Jian-Fang}, title = {Stochastic Human Motion Prediction with Memory of Action Transition and Action Characteristic}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1883-1893} }
Perception Tokens Enhance Visual Reasoning in Multimodal Language Models: Mahtab Bigverdi,

Zelun Luo,

Cheng-Yu Hsieh,

Ethan Shen,

Dongping Chen,

Linda G. Shapiro,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bigverdi_2025_CVPR, author = {Bigverdi, Mahtab and Luo, Zelun and Hsieh, Cheng-Yu and Shen, Ethan and Chen, Dongping and Shapiro, Linda G. and Krishna, Ranjay}, title = {Perception Tokens Enhance Visual Reasoning in Multimodal Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3836-3845} }
X-Dyna: Expressive Dynamic Human Image Animation: Di Chang,

Hongyi Xu,

You Xie,

Yipeng Gao,

Zhengfei Kuang,

Shengqu Cai,

Chenxu Zhang,

Guoxian Song,

Chao Wang,

Yichun Shi,

Zeyuan Chen,

Shijie Zhou,

Linjie Luo,

Gordon Wetzstein,

Mohammad Soleymani; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Di and Xu, Hongyi and Xie, You and Gao, Yipeng and Kuang, Zhengfei and Cai, Shengqu and Zhang, Chenxu and Song, Guoxian and Wang, Chao and Shi, Yichun and Chen, Zeyuan and Zhou, Shijie and Luo, Linjie and Wetzstein, Gordon and Soleymani, Mohammad}, title = {X-Dyna: Expressive Dynamic Human Image Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5499-5509} }
Towards Effective and Sparse Adversarial Attack on Spiking Neural Networks via Breaking Invisible Surrogate Gradients: Li Lun,

Kunyu Feng,

Qinglong Ni,

Ling Liang,

Yuan Wang,

Ying Li,

Dunshan Yu,

Xiaoxin Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lun_2025_CVPR, author = {Lun, Li and Feng, Kunyu and Ni, Qinglong and Liang, Ling and Wang, Yuan and Li, Ying and Yu, Dunshan and Cui, Xiaoxin}, title = {Towards Effective and Sparse Adversarial Attack on Spiking Neural Networks via Breaking Invisible Surrogate Gradients}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3540-3551} }
Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation: Gianni Franchi,

Nacim Belkhir,

Dat Nguyen Trong,

Guoxuan Xia,

Andrea Pilzer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Franchi_2025_CVPR, author = {Franchi, Gianni and Belkhir, Nacim and Trong, Dat Nguyen and Xia, Guoxuan and Pilzer, Andrea}, title = {Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8062-8072} }
LLaVA-ST: A Multimodal Large Language Model for Fine-Grained Spatial-Temporal Understanding: Hongyu Li,

Jinyu Chen,

Ziyu Wei,

Shaofei Huang,

Tianrui Hui,

Jialin Gao,

Xiaoming Wei,

Si Liu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hongyu and Chen, Jinyu and Wei, Ziyu and Huang, Shaofei and Hui, Tianrui and Gao, Jialin and Wei, Xiaoming and Liu, Si}, title = {LLaVA-ST: A Multimodal Large Language Model for Fine-Grained Spatial-Temporal Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8592-8603} }
Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models: Yuhao Dong,

Zuyan Liu,

Hai-Long Sun,

Jingkang Yang,

Winston Hu,

Yongming Rao,

Ziwei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Yuhao and Liu, Zuyan and Sun, Hai-Long and Yang, Jingkang and Hu, Winston and Rao, Yongming and Liu, Ziwei}, title = {Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9062-9072} }
MaIR: A Locality- and Continuity-Preserving Mamba for Image Restoration: Boyun Li,

Haiyu Zhao,

Wenxin Wang,

Peng Hu,

Yuanbiao Gou,

Xi Peng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Boyun and Zhao, Haiyu and Wang, Wenxin and Hu, Peng and Gou, Yuanbiao and Peng, Xi}, title = {MaIR: A Locality- and Continuity-Preserving Mamba for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7491-7501} }
RSAR: Restricted State Angle Resolver and Rotated SAR Benchmark: Xin Zhang,

Xue Yang,

Yuxuan Li,

Jian Yang,

Ming-Ming Cheng,

Xiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Yang, Xue and Li, Yuxuan and Yang, Jian and Cheng, Ming-Ming and Li, Xiang}, title = {RSAR: Restricted State Angle Resolver and Rotated SAR Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7416-7426} }
Continuous Space-Time Video Resampling with Invertible Motion Steganography: Yuantong Zhang,

Zhenzhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuantong and Chen, Zhenzhong}, title = {Continuous Space-Time Video Resampling with Invertible Motion Steganography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2116-2126} }
ProtoDepth: Unsupervised Continual Depth Completion with Prototypes: Patrick Rim,

Hyoungseob Park,

S. Gangopadhyay,

Ziyao Zeng,

Younjoon Chung,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rim_2025_CVPR, author = {Rim, Patrick and Park, Hyoungseob and Gangopadhyay, S. and Zeng, Ziyao and Chung, Younjoon and Wong, Alex}, title = {ProtoDepth: Unsupervised Continual Depth Completion with Prototypes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6304-6316} }
ParaHome: Parameterizing Everyday Home Activities Towards 3D Generative Modeling of Human-Object Interactions: Jeonghwan Kim,

Jisoo Kim,

Jeonghyeon Na,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jeonghwan and Kim, Jisoo and Na, Jeonghyeon and Joo, Hanbyul}, title = {ParaHome: Parameterizing Everyday Home Activities Towards 3D Generative Modeling of Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1816-1828} }
Adapting to the Unknown: Training-Free Audio-Visual Event Perception with Dynamic Thresholds: Eitan Shaar,

Ariel Shaulov,

Gal Chechik,

Lior Wolf; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shaar_2025_CVPR, author = {Shaar, Eitan and Shaulov, Ariel and Chechik, Gal and Wolf, Lior}, title = {Adapting to the Unknown: Training-Free Audio-Visual Event Perception with Dynamic Thresholds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3142-3151} }
OpenHumanVid: A Large-Scale High-Quality Dataset for Enhancing Human-Centric Video Generation: Hui Li,

Mingwang Xu,

Yun Zhan,

Shan Mu,

Jiaye Li,

Kaihui Cheng,

Yuxuan Chen,

Tan Chen,

Mao Ye,

Jingdong Wang,

Siyu Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hui and Xu, Mingwang and Zhan, Yun and Mu, Shan and Li, Jiaye and Cheng, Kaihui and Chen, Yuxuan and Chen, Tan and Ye, Mao and Wang, Jingdong and Zhu, Siyu}, title = {OpenHumanVid: A Large-Scale High-Quality Dataset for Enhancing Human-Centric Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7752-7762} }
Track Any Anomalous Object:A Granular Video Anomaly Detection Pipeline: Yuzhi Huang,

Chenxin Li,

Haitao Zhang,

Zixu Lin,

Yunlong Lin,

Hengyu Liu,

Wuyang Li,

Xinyu Liu,

Jiechao Gao,

Yue Huang,

Xinghao Ding,

Yixuan Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yuzhi and Li, Chenxin and Zhang, Haitao and Lin, Zixu and Lin, Yunlong and Liu, Hengyu and Li, Wuyang and Liu, Xinyu and Gao, Jiechao and Huang, Yue and Ding, Xinghao and Yuan, Yixuan}, title = {Track Any Anomalous Object:A Granular Video Anomaly Detection Pipeline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8689-8699} }
Object-aware Sound Source Localization via Audio-Visual Scene Understanding: Sung Jin Um,

Dongjin Kim,

Sangmin Lee,

Jung Uk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Um_2025_CVPR, author = {Um, Sung Jin and Kim, Dongjin and Lee, Sangmin and Kim, Jung Uk}, title = {Object-aware Sound Source Localization via Audio-Visual Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8342-8351} }
SerialGen: Personalized Image Generation by First Standardization Then Personalization: Cong Xie,

Han Zou,

Ruiqi Yu,

Yan Zhang,

Zhenpeng Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Cong and Zou, Han and Yu, Ruiqi and Zhang, Yan and Zhan, Zhenpeng}, title = {SerialGen: Personalized Image Generation by First Standardization Then Personalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2847-2856} }
Augmented Deep Contexts for Spatially Embedded Video Coding: Yifan Bian,

Chuanbo Tang,

Li Li,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bian_2025_CVPR, author = {Bian, Yifan and Tang, Chuanbo and Li, Li and Liu, Dong}, title = {Augmented Deep Contexts for Spatially Embedded Video Coding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2094-2104} }
Proximal Algorithm Unrolling: Flexible and Efficient Reconstruction Networks for Single-Pixel Imaging: Ping Wang,

Lishun Wang,

Gang Qu,

Xiaodong Wang,

Yulun Zhang,

Xin Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ping and Wang, Lishun and Qu, Gang and Wang, Xiaodong and Zhang, Yulun and Yuan, Xin}, title = {Proximal Algorithm Unrolling: Flexible and Efficient Reconstruction Networks for Single-Pixel Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {411-421} }
Image Quality Assessment: From Human to Machine Preference: Chunyi Li,

Yuan Tian,

Xiaoyue Ling,

Zicheng Zhang,

Haodong Duan,

Haoning Wu,

Ziheng Jia,

Xiaohong Liu,

Xiongkuo Min,

Guo Lu,

Weisi Lin,

Guangtao Zhai; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Chunyi and Tian, Yuan and Ling, Xiaoyue and Zhang, Zicheng and Duan, Haodong and Wu, Haoning and Jia, Ziheng and Liu, Xiaohong and Min, Xiongkuo and Lu, Guo and Lin, Weisi and Zhai, Guangtao}, title = {Image Quality Assessment: From Human to Machine Preference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7570-7581} }
Context-Aware Multimodal Pretraining: Karsten Roth,

Zeynep Akata,

Dima Damen,

Ivana Balazevic,

Olivier J. Henaff; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roth_2025_CVPR, author = {Roth, Karsten and Akata, Zeynep and Damen, Dima and Balazevic, Ivana and Henaff, Olivier J.}, title = {Context-Aware Multimodal Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4267-4279} }
Task-driven Image Fusion with Learnable Fusion Loss: Haowen Bai,

Jiangshe Zhang,

Zixiang Zhao,

Yichen Wu,

Lilun Deng,

Yukun Cui,

Tao Feng,

Shuang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Haowen and Zhang, Jiangshe and Zhao, Zixiang and Wu, Yichen and Deng, Lilun and Cui, Yukun and Feng, Tao and Xu, Shuang}, title = {Task-driven Image Fusion with Learnable Fusion Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7457-7468} }
LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant: Yikun Liu,

Yajie Zhang,

Jiayin Cai,

Xiaolong Jiang,

Yao Hu,

Jiangchao Yao,

Yanfeng Wang,

Weidi Xie; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yikun and Zhang, Yajie and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Yao, Jiangchao and Wang, Yanfeng and Xie, Weidi}, title = {LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4015-4025} }
CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal Understanding and Generation: Wei Chen,

Lin Li,

Yongqi Yang,

Bin Wen,

Fan Yang,

Tingting Gao,

Yu Wu,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Wei and Li, Lin and Yang, Yongqi and Wen, Bin and Yang, Fan and Gao, Tingting and Wu, Yu and Chen, Long}, title = {CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8073-8082} }
MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision: Ruicheng Wang,

Sicheng Xu,

Cassie Dai,

Jianfeng Xiang,

Yu Deng,

Xin Tong,

Jiaolong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ruicheng and Xu, Sicheng and Dai, Cassie and Xiang, Jianfeng and Deng, Yu and Tong, Xin and Yang, Jiaolong}, title = {MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5261-5271} }
Beyond Background Shift: Rethinking Instance Replay in Continual Semantic Segmentation: Hongmei Yin,

Tingliang Feng,

Fan Lyu,

Fanhua Shang,

Hongying Liu,

Wei Feng,

Liang Wan; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Hongmei and Feng, Tingliang and Lyu, Fan and Shang, Fanhua and Liu, Hongying and Feng, Wei and Wan, Liang}, title = {Beyond Background Shift: Rethinking Instance Replay in Continual Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9839-9848} }
ScaleLSD: Scalable Deep Line Segment Detection Streamlined: Zeran Ke,

Bin Tan,

Xianwei Zheng,

Yujun Shen,

Tianfu Wu,

Nan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Zeran and Tan, Bin and Zheng, Xianwei and Shen, Yujun and Wu, Tianfu and Xue, Nan}, title = {ScaleLSD: Scalable Deep Line Segment Detection Streamlined}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6327-6336} }
Revisiting MAE Pre-training for 3D Medical Image Segmentation: Tassilo Wald,

Constantin Ulrich,

Stanislav Lukyanenko,

Andrei Goncharov,

Alberto Paderno,

Maximilian Miller,

Leander Maerkisch,

Paul Jaeger,

Klaus Maier-Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wald_2025_CVPR, author = {Wald, Tassilo and Ulrich, Constantin and Lukyanenko, Stanislav and Goncharov, Andrei and Paderno, Alberto and Miller, Maximilian and Maerkisch, Leander and Jaeger, Paul and Maier-Hein, Klaus}, title = {Revisiting MAE Pre-training for 3D Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5186-5196} }
ChatHuman: Chatting about 3D Humans with Tools: Jing Lin,

Yao Feng,

Weiyang Liu,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jing and Feng, Yao and Liu, Weiyang and Black, Michael J.}, title = {ChatHuman: Chatting about 3D Humans with Tools}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8150-8161} }
Scalable Autoregressive Monocular Depth Estimation: Jinhong Wang,

Jian Liu,

Dongqi Tang,

Weiqiang Wang,

Wentong Li,

Danny Chen,

Jintai Chen,

Jian Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jinhong and Liu, Jian and Tang, Dongqi and Wang, Weiqiang and Li, Wentong and Chen, Danny and Chen, Jintai and Wu, Jian}, title = {Scalable Autoregressive Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6262-6272} }
Recurrence-Enhanced Vision-and-Language Transformers for Robust Multimodal Document Retrieval: Davide Caffagni,

Sara Sarto,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Caffagni_2025_CVPR, author = {Caffagni, Davide and Sarto, Sara and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Recurrence-Enhanced Vision-and-Language Transformers for Robust Multimodal Document Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9286-9295} }
Camouflage Anything: Learning to Hide using Controlled Out-painting and Representation Engineering: Biplab Das,

Viswanath Gopalakrishnan; [pdf] [supp]
[bibtex]
@InProceedings{Das_2025_CVPR, author = {Das, Biplab and Gopalakrishnan, Viswanath}, title = {Camouflage Anything: Learning to Hide using Controlled Out-painting and Representation Engineering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3603-3613} }
Test-Time Fine-Tuning of Image Compression Models for Multi-Task Adaptability: Unki Park,

Seongmoon Jeong,

Youngchan Jang,

Gyeong-Moon Park,

Jong Hwan Ko; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Unki and Jeong, Seongmoon and Jang, Youngchan and Park, Gyeong-Moon and Ko, Jong Hwan}, title = {Test-Time Fine-Tuning of Image Compression Models for Multi-Task Adaptability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4430-4440} }
DynPose: Largely Improving the Efficiency of Human Pose Estimation by a Simple Dynamic Framework: Yalong Xu,

Lin Zhao,

Chen Gong,

Guangyu Li,

Di Wang,

Nannan Wang; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yalong and Zhao, Lin and Gong, Chen and Li, Guangyu and Wang, Di and Wang, Nannan}, title = {DynPose: Largely Improving the Efficiency of Human Pose Estimation by a Simple Dynamic Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1160-1169} }
VideoTree: Adaptive Tree-based Video Representation for LLM Reasoning on Long Videos: Ziyang Wang,

Shoubin Yu,

Elias Stengel-Eskin,

Jaehong Yoon,

Feng Cheng,

Gedas Bertasius,

Mohit Bansal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ziyang and Yu, Shoubin and Stengel-Eskin, Elias and Yoon, Jaehong and Cheng, Feng and Bertasius, Gedas and Bansal, Mohit}, title = {VideoTree: Adaptive Tree-based Video Representation for LLM Reasoning on Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3272-3283} }
Distinguish Then Exploit: Source-free Open Set Domain Adaptation via Weight Barcode Estimation and Sparse Label Assignment: Weiming Liu,

Jun Dan,

Fan Wang,

Xinting Liao,

Junhao Dong,

Hua Yu,

Shunjie Dong,

Lianyong Qi; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Weiming and Dan, Jun and Wang, Fan and Liao, Xinting and Dong, Junhao and Yu, Hua and Dong, Shunjie and Qi, Lianyong}, title = {Distinguish Then Exploit: Source-free Open Set Domain Adaptation via Weight Barcode Estimation and Sparse Label Assignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4927-4938} }
Multi-Sensor Object Anomaly Detection: Unifying Appearance, Geometry, and Internal Properties: Wenqiao Li,

Bozhong Zheng,

Xiaohao Xu,

Jinye Gan,

Fading Lu,

Xiang Li,

Na Ni,

Zheng Tian,

Xiaonan Huang,

Shenghua Gao,

Yingna Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wenqiao and Zheng, Bozhong and Xu, Xiaohao and Gan, Jinye and Lu, Fading and Li, Xiang and Ni, Na and Tian, Zheng and Huang, Xiaonan and Gao, Shenghua and Wu, Yingna}, title = {Multi-Sensor Object Anomaly Detection: Unifying Appearance, Geometry, and Internal Properties}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9984-9993} }
Fancy123: One Image to High-Quality 3D Mesh Generation via Plug-and-Play Deformation: Qiao Yu,

Xianzhi Li,

Yuan Tang,

Xu Han,

Long Hu,

Yixue Hao,

Min Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Qiao and Li, Xianzhi and Tang, Yuan and Han, Xu and Hu, Long and Hao, Yixue and Chen, Min}, title = {Fancy123: One Image to High-Quality 3D Mesh Generation via Plug-and-Play Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {595-604} }
PlanarSplatting: Accurate Planar Surface Reconstruction in 3 Minutes: Bin Tan,

Rui Yu,

Yujun Shen,

Nan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Bin and Yu, Rui and Shen, Yujun and Xue, Nan}, title = {PlanarSplatting: Accurate Planar Surface Reconstruction in 3 Minutes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1190-1199} }
Omni-ID: Holistic Identity Representation Designed for Generative Tasks: Guocheng Qian,

Kuan-Chieh Wang,

Or Patashnik,

Negin Heravi,

Daniil Ostashev,

Sergey Tulyakov,

Daniel Cohen-Or,

Kfir Aberman; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Guocheng and Wang, Kuan-Chieh and Patashnik, Or and Heravi, Negin and Ostashev, Daniil and Tulyakov, Sergey and Cohen-Or, Daniel and Aberman, Kfir}, title = {Omni-ID: Holistic Identity Representation Designed for Generative Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8786-8795} }
MIRE: Matched Implicit Neural Representations: Dhananjaya Jayasundara,

Heng Zhao,

Demetrio Labate,

Vishal M. Patel; [pdf] [supp]
[bibtex]
@InProceedings{Jayasundara_2025_CVPR, author = {Jayasundara, Dhananjaya and Zhao, Heng and Labate, Demetrio and Patel, Vishal M.}, title = {MIRE: Matched Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8279-8288} }
AeSPa : Attention-guided Self-supervised Parallel Imaging for MRI Reconstruction: Jinho Joo,

Hyeseong Kim,

Hyeyeon Won,

Deukhee Lee,

Taejoon Eo,

Dosik Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Joo_2025_CVPR, author = {Joo, Jinho and Kim, Hyeseong and Won, Hyeyeon and Lee, Deukhee and Eo, Taejoon and Hwang, Dosik}, title = {AeSPa : Attention-guided Self-supervised Parallel Imaging for MRI Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5217-5226} }
RobSense: A Robust Multi-modal Foundation Model for Remote Sensing with Static, Temporal, and Incomplete Data Adaptability: Minh Kha Do,

Kang Han,

Phu Lai,

Khoa T. Phan,

Wei Xiang; [pdf]
[bibtex]
@InProceedings{Do_2025_CVPR, author = {Do, Minh Kha and Han, Kang and Lai, Phu and Phan, Khoa T. and Xiang, Wei}, title = {RobSense: A Robust Multi-modal Foundation Model for Remote Sensing with Static, Temporal, and Incomplete Data Adaptability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7427-7436} }
MAC-Ego3D: Multi-Agent Gaussian Consensus for Real-Time Collaborative Ego-Motion and Photorealistic 3D Reconstruction: Xiaohao Xu,

Feng Xue,

Shibo Zhao,

Yike Pan,

Sebastian Scherer,

Xiaonan Huang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Xiaohao and Xue, Feng and Zhao, Shibo and Pan, Yike and Scherer, Sebastian and Huang, Xiaonan}, title = {MAC-Ego3D: Multi-Agent Gaussian Consensus for Real-Time Collaborative Ego-Motion and Photorealistic 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {854-863} }
Online Video Understanding: OVBench and VideoChat-Online: Zhenpeng Huang,

Xinhao Li,

Jiaqi Li,

Jing Wang,

Xiangyu Zeng,

Cheng Liang,

Tao Wu,

Xi Chen,

Liang Li,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhenpeng and Li, Xinhao and Li, Jiaqi and Wang, Jing and Zeng, Xiangyu and Liang, Cheng and Wu, Tao and Chen, Xi and Li, Liang and Wang, Limin}, title = {Online Video Understanding: OVBench and VideoChat-Online}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3328-3338} }
LightLoc: Learning Outdoor LiDAR Localization at Light Speed: Wen Li,

Chen Liu,

Shangshu Yu,

Dunqiang Liu,

Yin Zhou,

Siqi Shen,

Chenglu Wen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wen and Liu, Chen and Yu, Shangshu and Liu, Dunqiang and Zhou, Yin and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {LightLoc: Learning Outdoor LiDAR Localization at Light Speed}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6680-6689} }
Accurate Differential Operators for Hybrid Neural Fields: Aditya Chetan,

Guandao Yang,

Zichen Wang,

Steve Marschner,

Bharath Hariharan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chetan_2025_CVPR, author = {Chetan, Aditya and Yang, Guandao and Wang, Zichen and Marschner, Steve and Hariharan, Bharath}, title = {Accurate Differential Operators for Hybrid Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {530-539} }
FeedEdit: Text-Based Image Editing with Dynamic Feedback Regulation: Fengyi Fu,

Lei Zhang,

Mengqi Huang,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Fengyi and Zhang, Lei and Huang, Mengqi and Mao, Zhendong}, title = {FeedEdit: Text-Based Image Editing with Dynamic Feedback Regulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2661-2670} }
Classifier-guided CLIP Distillation for Unsupervised Multi-label Classification: Dongseob Kim,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Dongseob and Shim, Hyunjung}, title = {Classifier-guided CLIP Distillation for Unsupervised Multi-label Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4661-4671} }
UMotion: Uncertainty-driven Human Motion Estimation from Inertial and Ultra-wideband Units: Huakun Liu,

Hiroki Ota,

Xin Wei,

Yutaro Hirao,

Monica Perusquia-Hernandez,

Hideaki Uchiyama,

Kiyoshi Kiyokawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Huakun and Ota, Hiroki and Wei, Xin and Hirao, Yutaro and Perusquia-Hernandez, Monica and Uchiyama, Hideaki and Kiyokawa, Kiyoshi}, title = {UMotion: Uncertainty-driven Human Motion Estimation from Inertial and Ultra-wideband Units}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7085-7094} }
Scene Map-based Prompt Tuning for Navigation Instruction Generation: Sheng Fan,

Rui Liu,

Wenguan Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Sheng and Liu, Rui and Wang, Wenguan and Yang, Yi}, title = {Scene Map-based Prompt Tuning for Navigation Instruction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6898-6908} }
DropoutGS: Dropping Out Gaussians for Better Sparse-view Rendering: Yexing Xu,

Longguang Wang,

Minglin Chen,

Sheng Ao,

Li Li,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yexing and Wang, Longguang and Chen, Minglin and Ao, Sheng and Li, Li and Guo, Yulan}, title = {DropoutGS: Dropping Out Gaussians for Better Sparse-view Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {701-710} }
Bridging Modalities: Improving Universal Multimodal Retrieval by Multimodal Large Language Models: Xin Zhang,

Yanzhao Zhang,

Wen Xie,

Mingxin Li,

Ziqi Dai,

Dingkun Long,

Pengjun Xie,

Meishan Zhang,

Wenjie Li,

Min Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Zhang, Yanzhao and Xie, Wen and Li, Mingxin and Dai, Ziqi and Long, Dingkun and Xie, Pengjun and Zhang, Meishan and Li, Wenjie and Zhang, Min}, title = {Bridging Modalities: Improving Universal Multimodal Retrieval by Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9274-9285} }
Enhancing Dataset Distillation via Non-Critical Region Refinement: Minh-Tuan Tran,

Trung Le,

Xuan-May Le,

Thanh-Toan Do,

Dinh Phung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_CVPR, author = {Tran, Minh-Tuan and Le, Trung and Le, Xuan-May and Do, Thanh-Toan and Phung, Dinh}, title = {Enhancing Dataset Distillation via Non-Critical Region Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10015-10024} }
PUP 3D-GS: Principled Uncertainty Pruning for 3D Gaussian Splatting: Alex Hanson,

Allen Tu,

Vasu Singla,

Mayuka Jayawardhana,

Matthias Zwicker,

Tom Goldstein; [pdf] [supp]
[bibtex]
@InProceedings{Hanson_2025_CVPR, author = {Hanson, Alex and Tu, Allen and Singla, Vasu and Jayawardhana, Mayuka and Zwicker, Matthias and Goldstein, Tom}, title = {PUP 3D-GS: Principled Uncertainty Pruning for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5949-5958} }
ScribbleLight: Single Image Indoor Relighting with Scribbles: Jun Myeong Choi,

Annie Wang,

Pieter Peers,

Anand Bhattad,

Roni Sengupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Jun Myeong and Wang, Annie and Peers, Pieter and Bhattad, Anand and Sengupta, Roni}, title = {ScribbleLight: Single Image Indoor Relighting with Scribbles}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5720-5731} }
InsightEdit: Towards Better Instruction Following for Image Editing: Yingjing Xu,

Jie Kong,

Jiazhi Wang,

Xiao Pan,

Bo Lin,

Qiang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yingjing and Kong, Jie and Wang, Jiazhi and Pan, Xiao and Lin, Bo and Liu, Qiang}, title = {InsightEdit: Towards Better Instruction Following for Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2694-2703} }
One-for-More: Continual Diffusion Model for Anomaly Detection: Xiaofan Li,

Xin Tan,

Zhuo Chen,

Zhizhong Zhang,

Ruixin Zhang,

Rizen Guo,

Guanna Jiang,

Yulong Chen,

Yanyun Qu,

Lizhuang Ma,

Yuan Xie; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaofan and Tan, Xin and Chen, Zhuo and Zhang, Zhizhong and Zhang, Ruixin and Guo, Rizen and Jiang, Guanna and Chen, Yulong and Qu, Yanyun and Ma, Lizhuang and Xie, Yuan}, title = {One-for-More: Continual Diffusion Model for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4766-4775} }
Omni-RGPT: Unifying Image and Video Region-level Understanding via Token Marks: Miran Heo,

Min-Hung Chen,

De-An Huang,

Sifei Liu,

Subhashree Radhakrishnan,

Seon Joo Kim,

Yu-Chiang Frank Wang,

Ryo Hachiuma; [pdf] [supp]
[bibtex]
@InProceedings{Heo_2025_CVPR, author = {Heo, Miran and Chen, Min-Hung and Huang, De-An and Liu, Sifei and Radhakrishnan, Subhashree and Kim, Seon Joo and Wang, Yu-Chiang Frank and Hachiuma, Ryo}, title = {Omni-RGPT: Unifying Image and Video Region-level Understanding via Token Marks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3919-3930} }
EDM: Equirectangular Projection-Oriented Dense Kernelized Feature Matching: Dongki Jung,

Jaehoon Choi,

Yonghan Lee,

Somi Jeong,

Taejae Lee,

Dinesh Manocha,

Suyong Yeon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Dongki and Choi, Jaehoon and Lee, Yonghan and Jeong, Somi and Lee, Taejae and Manocha, Dinesh and Yeon, Suyong}, title = {EDM: Equirectangular Projection-Oriented Dense Kernelized Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6337-6347} }
EZSR: Event-based Zero-Shot Recognition: Yan Yang,

Liyuan Pan,

Dongxu Li,

Liu Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yan and Pan, Liyuan and Li, Dongxu and Liu, Liu}, title = {EZSR: Event-based Zero-Shot Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4628-4638} }
SVFR: A Unified Framework for Generalized Video Face Restoration: Zhiyao Wang,

Xu Chen,

Chengming Xu,

Junwei Zhu,

Xiaobin Hu,

Jiangning Zhang,

Chengjie Wang,

Yuqi Liu,

Yiyi Zhou,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhiyao and Chen, Xu and Xu, Chengming and Zhu, Junwei and Hu, Xiaobin and Zhang, Jiangning and Wang, Chengjie and Liu, Yuqi and Zhou, Yiyi and Ji, Rongrong}, title = {SVFR: A Unified Framework for Generalized Video Face Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7406-7415} }
Decoupling Fine Detail and Global Geometry for Compressed Depth Map Super-Resolution: Huan Zheng,

Wencheng Han,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Huan and Han, Wencheng and Shen, Jianbing}, title = {Decoupling Fine Detail and Global Geometry for Compressed Depth Map Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {951-960} }
Digital Twin Catalog: A Large-Scale Photorealistic 3D Object Digital Twin Dataset: Zhao Dong,

Ka Chen,

Zhaoyang Lv,

Hong-Xing Yu,

Yunzhi Zhang,

Cheng Zhang,

Yufeng Zhu,

Stephen Tian,

Zhengqin Li,

Geordie Moffatt,

Sean Christofferson,

James Fort,

Xiaqing Pan,

Mingfei Yan,

Jiajun Wu,

Carl Yuheng Ren,

Richard Newcombe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Zhao and Chen, Ka and Lv, Zhaoyang and Yu, Hong-Xing and Zhang, Yunzhi and Zhang, Cheng and Zhu, Yufeng and Tian, Stephen and Li, Zhengqin and Moffatt, Geordie and Christofferson, Sean and Fort, James and Pan, Xiaqing and Yan, Mingfei and Wu, Jiajun and Ren, Carl Yuheng and Newcombe, Richard}, title = {Digital Twin Catalog: A Large-Scale Photorealistic 3D Object Digital Twin Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {753-763} }
MeshGen: Generating PBR Textured Mesh with Render-Enhanced Auto-Encoder and Generative Data Augmentation: Zilong Chen,

Yikai Wang,

Wenqiang Sun,

Feng Wang,

Yiwen Chen,

Huaping Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zilong and Wang, Yikai and Sun, Wenqiang and Wang, Feng and Chen, Yiwen and Liu, Huaping}, title = {MeshGen: Generating PBR Textured Mesh with Render-Enhanced Auto-Encoder and Generative Data Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5835-5848} }
DeClotH: Decomposable 3D Cloth and Human Body Reconstruction from a Single Image: Hyeongjin Nam,

Donghwan Kim,

Jeongtaek Oh,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_CVPR, author = {Nam, Hyeongjin and Kim, Donghwan and Oh, Jeongtaek and Lee, Kyoung Mu}, title = {DeClotH: Decomposable 3D Cloth and Human Body Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5636-5645} }
Neuron: Learning Context-Aware Evolving Representations for Zero-Shot Skeleton Action Recognition: Yang Chen,

Jingcai Guo,

Song Guo,

Dacheng Tao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yang and Guo, Jingcai and Guo, Song and Tao, Dacheng}, title = {Neuron: Learning Context-Aware Evolving Representations for Zero-Shot Skeleton Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8721-8730} }
High-Fidelity Relightable Monocular Portrait Animation with Lighting-Controllable Video Diffusion Model: Mingtao Guo,

Guanyu Xing,

Yanli Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Mingtao and Xing, Guanyu and Liu, Yanli}, title = {High-Fidelity Relightable Monocular Portrait Animation with Lighting-Controllable Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {228-238} }
Plug-and-Play PPO: An Adaptive Point Prompt Optimizer Making SAM Greater: Xueyu Liu,

Rui Wang,

Yexin Lai,

Guangze Shi,

Feixue Shao,

Fang Hao,

Jianan Zhang,

Jia Shen,

Yongfei Wu,

Wen Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xueyu and Wang, Rui and Lai, Yexin and Shi, Guangze and Shao, Feixue and Hao, Fang and Zhang, Jianan and Shen, Jia and Wu, Yongfei and Zheng, Wen}, title = {Plug-and-Play PPO: An Adaptive Point Prompt Optimizer Making SAM Greater}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4332-4342} }
EchoONE: Segmenting Multiple Echocardiography Planes in One Model: Jiongtong Hu,

Wufeng Xue,

Jun Cheng,

Yingying Liu,

Wei Zhuo,

Dong Ni; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Jiongtong and Xue, Wufeng and Cheng, Jun and Liu, Yingying and Zhuo, Wei and Ni, Dong}, title = {EchoONE: Segmenting Multiple Echocardiography Planes in One Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5207-5216} }
EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild: Yumeng Liu,

Xiaoxiao Long,

Zemin Yang,

Yuan Liu,

Marc Habermann,

Christian Theobalt,

Yuexin Ma,

Wenping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yumeng and Long, Xiaoxiao and Yang, Zemin and Liu, Yuan and Habermann, Marc and Theobalt, Christian and Ma, Yuexin and Wang, Wenping}, title = {EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7037-7047} }
Patch Matters: Training-free Fine-grained Image Caption Enhancement via Local Perception: Ruotian Peng,

Haiying He,

Yake Wei,

Yandong Wen,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Ruotian and He, Haiying and Wei, Yake and Wen, Yandong and Hu, Di}, title = {Patch Matters: Training-free Fine-grained Image Caption Enhancement via Local Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3963-3973} }
PosterO: Structuring Layout Trees to Enable Language Models in Generalized Content-Aware Layout Generation: HsiaoYuan Hsu,

Yuxin Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsu_2025_CVPR, author = {Hsu, HsiaoYuan and Peng, Yuxin}, title = {PosterO: Structuring Layout Trees to Enable Language Models in Generalized Content-Aware Layout Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8117-8127} }
One2Any: One-Reference 6D Pose Estimation for Any Object: Mengya Liu,

Siyuan Li,

Ajad Chhatkuli,

Prune Truong,

Luc Van Gool,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Mengya and Li, Siyuan and Chhatkuli, Ajad and Truong, Prune and Van Gool, Luc and Tombari, Federico}, title = {One2Any: One-Reference 6D Pose Estimation for Any Object}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6457-6467} }
Contextual AD Narration with Interleaved Multimodal Sequence: Hanlin Wang,

Zhan Tong,

Kecheng Zheng,

Yujun Shen,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hanlin and Tong, Zhan and Zheng, Kecheng and Shen, Yujun and Wang, Limin}, title = {Contextual AD Narration with Interleaved Multimodal Sequence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8372-8383} }
MNE-SLAM: Multi-Agent Neural SLAM for Mobile Robots: Tianchen Deng,

Guole Shen,

Chen Xun,

Shenghai Yuan,

Tongxin Jin,

Hongming Shen,

Yanbo Wang,

Jingchuan Wang,

Hesheng Wang,

Danwei Wang,

Weidong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Tianchen and Shen, Guole and Xun, Chen and Yuan, Shenghai and Jin, Tongxin and Shen, Hongming and Wang, Yanbo and Wang, Jingchuan and Wang, Hesheng and Wang, Danwei and Chen, Weidong}, title = {MNE-SLAM: Multi-Agent Neural SLAM for Mobile Robots}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1485-1494} }
TensoFlow: Tensorial Flow-based Sampler for Inverse Rendering: Chun Gu,

Xiaofei Wei,

Li Zhang,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Chun and Wei, Xiaofei and Zhang, Li and Zhu, Xiatian}, title = {TensoFlow: Tensorial Flow-based Sampler for Inverse Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {495-504} }
FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering: Chengyue Huang,

Brisa Maneechotesuwan,

Shivang Chopra,

Zsolt Kira; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Chengyue and Maneechotesuwan, Brisa and Chopra, Shivang and Kira, Zsolt}, title = {FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3909-3918} }
LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive Visual Preferences: Hongyan Zhi,

Peihao Chen,

Junyan Li,

Shuailei Ma,

Xinyu Sun,

Tianhang Xiang,

Yinjie Lei,

Mingkui Tan,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhi_2025_CVPR, author = {Zhi, Hongyan and Chen, Peihao and Li, Junyan and Ma, Shuailei and Sun, Xinyu and Xiang, Tianhang and Lei, Yinjie and Tan, Mingkui and Gan, Chuang}, title = {LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive Visual Preferences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3761-3771} }
Exploring Temporally-Aware Features for Point Tracking: Inès Hyeonsu Kim,

Seokju Cho,

Jiahui Huang,

Jung Yi,

Joon-Young Lee,

Seungryong Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, In\`es Hyeonsu and Cho, Seokju and Huang, Jiahui and Yi, Jung and Lee, Joon-Young and Kim, Seungryong}, title = {Exploring Temporally-Aware Features for Point Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1962-1972} }
V^2Dial: Unification of Video and Visual Dialog via Multimodal Experts: Adnen Abdessaied,

Anna Rohrbach,

Marcus Rohrbach,

Andreas Bulling; [pdf] [supp]
[bibtex]
@InProceedings{Abdessaied_2025_CVPR, author = {Abdessaied, Adnen and Rohrbach, Anna and Rohrbach, Marcus and Bulling, Andreas}, title = {V{\textasciicircum}2Dial: Unification of Video and Visual Dialog via Multimodal Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8637-8647} }
Detail-Preserving Latent Diffusion for Stable Shadow Removal: Jiamin Xu,

Yuxin Zheng,

Zelong Li,

Chi Wang,

Renshu Gu,

Weiwei Xu,

Gang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jiamin and Zheng, Yuxin and Li, Zelong and Wang, Chi and Gu, Renshu and Xu, Weiwei and Xu, Gang}, title = {Detail-Preserving Latent Diffusion for Stable Shadow Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7592-7602} }
CrossOver: 3D Scene Cross-Modal Alignment: Sayan Deb Sarkar,

Ondrej Miksik,

Marc Pollefeys,

Daniel Barath,

Iro Armeni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarkar_2025_CVPR, author = {Sarkar, Sayan Deb and Miksik, Ondrej and Pollefeys, Marc and Barath, Daniel and Armeni, Iro}, title = {CrossOver: 3D Scene Cross-Modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8985-8994} }
Rethinking Temporal Fusion with a Unified Gradient Descent View for 3D Semantic Occupancy Prediction: Dubing Chen,

Huan Zheng,

Jin Fang,

Xingping Dong,

Xianfei Li,

Wenlong Liao,

Tao He,

Pai Peng,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Dubing and Zheng, Huan and Fang, Jin and Dong, Xingping and Li, Xianfei and Liao, Wenlong and He, Tao and Peng, Pai and Shen, Jianbing}, title = {Rethinking Temporal Fusion with a Unified Gradient Descent View for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1505-1515} }
Scalable Video-to-Dataset Generation for Cross-Platform Mobile Agents: Yunseok Jang,

Yeda Song,

Sungryull Sohn,

Lajanugen Logeswaran,

Tiange Luo,

Dong-Ki Kim,

Kyunghoon Bae,

Honglak Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Yunseok and Song, Yeda and Sohn, Sungryull and Logeswaran, Lajanugen and Luo, Tiange and Kim, Dong-Ki and Bae, Kyunghoon and Lee, Honglak}, title = {Scalable Video-to-Dataset Generation for Cross-Platform Mobile Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8604-8614} }
Tokenize Image Patches: Global Context Fusion for Effective Haze Removal in Large Images: Jiuchen Chen,

Xinyu Yan,

Qizhi Xu,

Kaiqi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jiuchen and Yan, Xinyu and Xu, Qizhi and Li, Kaiqi}, title = {Tokenize Image Patches: Global Context Fusion for Effective Haze Removal in Large Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2258-2268} }
ANNEXE: Unified Analyzing, Answering, and Pixel Grounding for Egocentric Interaction: Yuejiao Su,

Yi Wang,

Qiongyang Hu,

Chuang Yang,

Lap-Pui Chau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Yuejiao and Wang, Yi and Hu, Qiongyang and Yang, Chuang and Chau, Lap-Pui}, title = {ANNEXE: Unified Analyzing, Answering, and Pixel Grounding for Egocentric Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9027-9038} }
MET3R: Measuring Multi-View Consistency in Generated Images: Mohammad Asim,

Christopher Wewer,

Thomas Wimmer,

Bernt Schiele,

Jan Eric Lenssen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asim_2025_CVPR, author = {Asim, Mohammad and Wewer, Christopher and Wimmer, Thomas and Schiele, Bernt and Lenssen, Jan Eric}, title = {MET3R: Measuring Multi-View Consistency in Generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6034-6044} }
Segmenting Maxillofacial Structures in CBCT Volumes: Federico Bolelli,

Kevin Marchesini,

Niels van Nistelrooij,

Luca Lumetti,

Vittorio Pipoli,

Elisa Ficarra,

Shankeeth Vinayahalingam,

Costantino Grana; [pdf]
[bibtex]
@InProceedings{Bolelli_2025_CVPR, author = {Bolelli, Federico and Marchesini, Kevin and van Nistelrooij, Niels and Lumetti, Luca and Pipoli, Vittorio and Ficarra, Elisa and Vinayahalingam, Shankeeth and Grana, Costantino}, title = {Segmenting Maxillofacial Structures in CBCT Volumes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5238-5248} }
3D Dental Model Segmentation with Geometrical Boundary Preserving: Shufan Xi,

Zexian Liu,

Junlin Chang,

Hongyu Wu,

Xiaogang Wang,

Aimin Hao; [pdf] [arXiv]
[bibtex]
@InProceedings{Xi_2025_CVPR, author = {Xi, Shufan and Liu, Zexian and Chang, Junlin and Wu, Hongyu and Wang, Xiaogang and Hao, Aimin}, title = {3D Dental Model Segmentation with Geometrical Boundary Preserving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10476-10485} }
VideoGigaGAN: Towards Detail-rich Video Super-Resolution: Yiran Xu,

Taesung Park,

Richard Zhang,

Yang Zhou,

Eli Shechtman,

Feng Liu,

Jia-Bin Huang,

Difan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yiran and Park, Taesung and Zhang, Richard and Zhou, Yang and Shechtman, Eli and Liu, Feng and Huang, Jia-Bin and Liu, Difan}, title = {VideoGigaGAN: Towards Detail-rich Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2139-2149} }
GLUS: Global-Local Reasoning Unified into A Single Large Language Model for Video Segmentation: Lang Lin,

Xueyang Yu,

Ziqi Pang,

Yu-Xiong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Lang and Yu, Xueyang and Pang, Ziqi and Wang, Yu-Xiong}, title = {GLUS: Global-Local Reasoning Unified into A Single Large Language Model for Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8658-8667} }
Towards RAW Object Detection in Diverse Conditions: Zhong-Yu Li,

Xin Jin,

Bo-Yuan Sun,

Chun-Le Guo,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhong-Yu and Jin, Xin and Sun, Bo-Yuan and Guo, Chun-Le and Cheng, Ming-Ming}, title = {Towards RAW Object Detection in Diverse Conditions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8859-8868} }
FLAME: Frozen Large Language Models Enable Data-Efficient Language-Image Pre-training: Anjia Cao,

Xing Wei,

Zhiheng Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Anjia and Wei, Xing and Ma, Zhiheng}, title = {FLAME: Frozen Large Language Models Enable Data-Efficient Language-Image Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4080-4090} }
Adapter Merging with Centroid Prototype Mapping for Scalable Class-Incremental Learning: Takuma Fukuda,

Hiroshi Kera,

Kazuhiko Kawamoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fukuda_2025_CVPR, author = {Fukuda, Takuma and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Adapter Merging with Centroid Prototype Mapping for Scalable Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4884-4893} }
OpenSDI: Spotting Diffusion-Generated Images in the Open World: Yabin Wang,

Zhiwu Huang,

Xiaopeng Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yabin and Huang, Zhiwu and Hong, Xiaopeng}, title = {OpenSDI: Spotting Diffusion-Generated Images in the Open World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4291-4301} }
Lux Post Facto: Learning Portrait Performance Relighting with Conditional Video Diffusion and a Hybrid Dataset: Yiqun Mei,

Mingming He,

Li Ma,

Julien Philip,

Wenqi Xian,

David M George,

Xueming Yu,

Gabriel Dedic,

Ahmet Levent Taşel,

Ning Yu,

Vishal M. Patel,

Paul Debevec; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Yiqun and He, Mingming and Ma, Li and Philip, Julien and Xian, Wenqi and George, David M and Yu, Xueming and Dedic, Gabriel and Ta\c{s}el, Ahmet Levent and Yu, Ning and Patel, Vishal M. and Debevec, Paul}, title = {Lux Post Facto: Learning Portrait Performance Relighting with Conditional Video Diffusion and a Hybrid Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5510-5522} }
DiG: Scalable and Efficient Diffusion Models with Gated Linear Attention: Lianghui Zhu,

Zilong Huang,

Bencheng Liao,

Jun Hao Liew,

Hanshu Yan,

Jiashi Feng,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lianghui and Huang, Zilong and Liao, Bencheng and Liew, Jun Hao and Yan, Hanshu and Feng, Jiashi and Wang, Xinggang}, title = {DiG: Scalable and Efficient Diffusion Models with Gated Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7664-7674} }
Monocular and Generalizable Gaussian Talking Head Animation: Shengjie Gong,

Haojie Li,

Jiapeng Tang,

Dongming Hu,

Shuangping Huang,

Hao Chen,

Tianshui Chen,

Zhuoman Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_CVPR, author = {Gong, Shengjie and Li, Haojie and Tang, Jiapeng and Hu, Dongming and Huang, Shuangping and Chen, Hao and Chen, Tianshui and Liu, Zhuoman}, title = {Monocular and Generalizable Gaussian Talking Head Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5523-5534} }
Locally Orderless Images for Optimization in Differentiable Rendering: Ishit Mehta,

Manmohan Chandraker,

Ravi Ramamoorthi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehta_2025_CVPR, author = {Mehta, Ishit and Chandraker, Manmohan and Ramamoorthi, Ravi}, title = {Locally Orderless Images for Optimization in Differentiable Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5763-5772} }
Plug-and-Play Interpretable Responsible Text-to-Image Generation via Dual-Space Multi-facet Concept Control: Basim Azam,

Naveed Akhtar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Azam_2025_CVPR, author = {Azam, Basim and Akhtar, Naveed}, title = {Plug-and-Play Interpretable Responsible Text-to-Image Generation via Dual-Space Multi-facet Concept Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2976-2985} }
Fine-Grained Erasure in Text-to-Image Diffusion-based Foundation Models: Kartik Thakral,

Tamar Glaser,

Tal Hassner,

Mayank Vatsa,

Richa Singh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thakral_2025_CVPR, author = {Thakral, Kartik and Glaser, Tamar and Hassner, Tal and Vatsa, Mayank and Singh, Richa}, title = {Fine-Grained Erasure in Text-to-Image Diffusion-based Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9121-9130} }
DeRS: Towards Extremely Efficient Upcycled Mixture-of-Experts Models: Yongqi Huang,

Peng Ye,

Chenyu Huang,

Jianjian Cao,

Lin Zhang,

Baopu Li,

Gang Yu,

Tao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yongqi and Ye, Peng and Huang, Chenyu and Cao, Jianjian and Zhang, Lin and Li, Baopu and Yu, Gang and Chen, Tao}, title = {DeRS: Towards Extremely Efficient Upcycled Mixture-of-Experts Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10056-10066} }
ALIEN: Implicit Neural Representations for Human Motion Prediction under Arbitrary Latency: Dong Wei,

Xiaoning Sun,

Xizhan Gao,

Shengxiang Hu,

Huaijiang Sun; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Dong and Sun, Xiaoning and Gao, Xizhan and Hu, Shengxiang and Sun, Huaijiang}, title = {ALIEN: Implicit Neural Representations for Human Motion Prediction under Arbitrary Latency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1861-1870} }
Sufficient Invariant Learning for Distribution Shift: Taero Kim,

Subeen Park,

Sungjun Lim,

Yonghan Jung,

Krikamol Muandet,

Kyungwoo Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Taero and Park, Subeen and Lim, Sungjun and Jung, Yonghan and Muandet, Krikamol and Song, Kyungwoo}, title = {Sufficient Invariant Learning for Distribution Shift}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4958-4967} }
Domain Generalization in CLIP via Learning with Diverse Text Prompts: Changsong Wen,

Zelin Peng,

Yu Huang,

Xiaokang Yang,

Wei Shen; [pdf]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Changsong and Peng, Zelin and Huang, Yu and Yang, Xiaokang and Shen, Wei}, title = {Domain Generalization in CLIP via Learning with Diverse Text Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9559-9569} }
IterIS: Iterative Inference-Solving Alignment for LoRA Merging: Hongxu Chen,

Zhen Wang,

Runshi Li,

Bowei Zhu,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Hongxu and Wang, Zhen and Li, Runshi and Zhu, Bowei and Chen, Long}, title = {IterIS: Iterative Inference-Solving Alignment for LoRA Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4829-4838} }
Efficient ANN-Guided Distillation: Aligning Rate-based Features of Spiking Neural Networks through Hybrid Block-wise Replacement: Shu Yang,

Chengting Yu,

Lei Liu,

Hanzhi Ma,

Aili Wang,

Erping Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Shu and Yu, Chengting and Liu, Lei and Ma, Hanzhi and Wang, Aili and Li, Erping}, title = {Efficient ANN-Guided Distillation: Aligning Rate-based Features of Spiking Neural Networks through Hybrid Block-wise Replacement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10025-10035} }
PrEditor3D: Fast and Precise 3D Shape Editing: Ziya Erkoç,

Can Gümeli,

Chaoyang Wang,

Matthias Nießner,

Angela Dai,

Peter Wonka,

Hsin-Ying Lee,

Peiye Zhuang; [pdf] [supp]
[bibtex]
@InProceedings{Erkoc_2025_CVPR, author = {Erko\c{c}, Ziya and G\"umeli, Can and Wang, Chaoyang and Nie{\ss}ner, Matthias and Dai, Angela and Wonka, Peter and Lee, Hsin-Ying and Zhuang, Peiye}, title = {PrEditor3D: Fast and Precise 3D Shape Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {640-649} }
ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices: Hao Yu,

Tangyu Jiang,

Shuning Jia,

Shannan Yan,

Shunning Liu,

Haolong Qian,

Guanghao Li,

Shuting Dong,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hao and Jiang, Tangyu and Jia, Shuning and Yan, Shannan and Liu, Shunning and Qian, Haolong and Li, Guanghao and Dong, Shuting and Yuan, Chun}, title = {ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4508-4517} }
LOCORE: Image Re-ranking with Long-Context Sequence Modeling: Zilin Xiao,

Pavel Suma,

Ayush Sachdeva,

Hao-Jen Wang,

Giorgos Kordopatis-Zilos,

Giorgos Tolias,

Vicente Ordonez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Zilin and Suma, Pavel and Sachdeva, Ayush and Wang, Hao-Jen and Kordopatis-Zilos, Giorgos and Tolias, Giorgos and Ordonez, Vicente}, title = {LOCORE: Image Re-ranking with Long-Context Sequence Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9580-9590} }
LiVOS: Light Video Object Segmentation with Gated Linear Matching: Qin Liu,

Jianfeng Wang,

Zhengyuan Yang,

Linjie Li,

Kevin Lin,

Marc Niethammer,

Lijuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Qin and Wang, Jianfeng and Yang, Zhengyuan and Li, Linjie and Lin, Kevin and Niethammer, Marc and Wang, Lijuan}, title = {LiVOS: Light Video Object Segmentation with Gated Linear Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8668-8678} }
Polarized Color Screen Matting: Kenji Enomoto,

Scott Cohen,

Brian Price,

TJ Rhodes; [pdf] [supp]
[bibtex]
@InProceedings{Enomoto_2025_CVPR, author = {Enomoto, Kenji and Cohen, Scott and Price, Brian and Rhodes, TJ}, title = {Polarized Color Screen Matting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {391-399} }
GOAL: Global-local Object Alignment Learning: Hyungyu Choi,

Young Kyun Jang,

Chanho Eom; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Hyungyu and Jang, Young Kyun and Eom, Chanho}, title = {GOAL: Global-local Object Alignment Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4070-4079} }
Post-pre-training for Modality Alignment in Vision-Language Foundation Models: Shin'ya Yamaguchi,

Dewei Feng,

Sekitoshi Kanai,

Kazuki Adachi,

Daiki Chijiwa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamaguchi_2025_CVPR, author = {Yamaguchi, Shin'ya and Feng, Dewei and Kanai, Sekitoshi and Adachi, Kazuki and Chijiwa, Daiki}, title = {Post-pre-training for Modality Alignment in Vision-Language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4256-4266} }
SynthLight: Portrait Relighting with Diffusion Model by Learning to Re-render Synthetic Faces: Sumit Chaturvedi,

Mengwei Ren,

Yannick Hold-Geoffroy,

Jingyuan Liu,

Julie Dorsey,

Zhixin Shu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaturvedi_2025_CVPR, author = {Chaturvedi, Sumit and Ren, Mengwei and Hold-Geoffroy, Yannick and Liu, Jingyuan and Dorsey, Julie and Shu, Zhixin}, title = {SynthLight: Portrait Relighting with Diffusion Model by Learning to Re-render Synthetic Faces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {369-379} }
Pseudo Visible Feature Fine-Grained Fusion for Thermal Object Detection: Ting Li,

Mao Ye,

Tianwen Wu,

Nianxin Li,

Shuaifeng Li,

Song Tang,

Luping Ji; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ting and Ye, Mao and Wu, Tianwen and Li, Nianxin and Li, Shuaifeng and Tang, Song and Ji, Luping}, title = {Pseudo Visible Feature Fine-Grained Fusion for Thermal Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6710-6719} }
NVILA: Efficient Frontier Visual Language Models: Zhijian Liu,

Ligeng Zhu,

Baifeng Shi,

Zhuoyang Zhang,

Yuming Lou,

Shang Yang,

Haocheng Xi,

Shiyi Cao,

Yuxian Gu,

Dacheng Li,

Xiuyu Li,

Haotian Tang,

Yunhao Fang,

Yukang Chen,

Cheng-Yu Hsieh,

De-An Huang,

An-Chieh Cheng,

Jinyi Hu,

Sifei Liu,

Ranjay Krishna,

Pavlo Molchanov,

Jan Kautz,

Hongxu Yin,

Song Han,

Yao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhijian and Zhu, Ligeng and Shi, Baifeng and Zhang, Zhuoyang and Lou, Yuming and Yang, Shang and Xi, Haocheng and Cao, Shiyi and Gu, Yuxian and Li, Dacheng and Li, Xiuyu and Tang, Haotian and Fang, Yunhao and Chen, Yukang and Hsieh, Cheng-Yu and Huang, De-An and Cheng, An-Chieh and Hu, Jinyi and Liu, Sifei and Krishna, Ranjay and Molchanov, Pavlo and Kautz, Jan and Yin, Hongxu and Han, Song and Lu, Yao}, title = {NVILA: Efficient Frontier Visual Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4122-4134} }
SemiETS: Integrating Spatial and Content Consistencies for Semi-Supervised End-to-end Text Spotting: Dongliang Luo,

Hanshen Zhu,

Ziyang Zhang,

Dingkang Liang,

Xudong Xie,

Yuliang Liu,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Dongliang and Zhu, Hanshen and Zhang, Ziyang and Liang, Dingkang and Xie, Xudong and Liu, Yuliang and Bai, Xiang}, title = {SemiETS: Integrating Spatial and Content Consistencies for Semi-Supervised End-to-end Text Spotting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9329-9338} }
NoPain: No-box Point Cloud Attack via Optimal Transport Singular Boundary: Zezeng Li,

Xiaoyu Du,

Na Lei,

Liming Chen,

Weimin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zezeng and Du, Xiaoyu and Lei, Na and Chen, Liming and Wang, Weimin}, title = {NoPain: No-box Point Cloud Attack via Optimal Transport Singular Boundary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3492-3502} }
FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation: Tianyun Zhong,

Chao Liang,

Jianwen Jiang,

Gaojie Lin,

Jiaqi Yang,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Tianyun and Liang, Chao and Jiang, Jianwen and Lin, Gaojie and Yang, Jiaqi and Zhao, Zhou}, title = {FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3101-3110} }
Geometry Field Splatting with Gaussian Surfels: Kaiwen Jiang,

Venkataram Sivaram,

Cheng Peng,

Ravi Ramamoorthi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Kaiwen and Sivaram, Venkataram and Peng, Cheng and Ramamoorthi, Ravi}, title = {Geometry Field Splatting with Gaussian Surfels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5752-5762} }
PS-EIP: Robust Photometric Stereo Based on Event Interval Profile: Kazuma Kitazawa,

Takahito Aoto,

Satoshi Ikehata,

Tsuyoshi Takatani; [pdf] [supp]
[bibtex]
@InProceedings{Kitazawa_2025_CVPR, author = {Kitazawa, Kazuma and Aoto, Takahito and Ikehata, Satoshi and Takatani, Tsuyoshi}, title = {PS-EIP: Robust Photometric Stereo Based on Event Interval Profile}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6241-6251} }
GenPC: Zero-shot Point Cloud Completion via 3D Generative Priors: An Li,

Zhe Zhu,

Mingqiang Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, An and Zhu, Zhe and Wei, Mingqiang}, title = {GenPC: Zero-shot Point Cloud Completion via 3D Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1308-1318} }
Latent Drifting in Diffusion Models for Counterfactual Medical Image Synthesis: Yousef Yeganeh,

Azade Farshad,

Ioannis Charisiadis,

Marta Hasny,

Martin Hartenberger,

Björn Ommer,

Nassir Navab,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeganeh_2025_CVPR, author = {Yeganeh, Yousef and Farshad, Azade and Charisiadis, Ioannis and Hasny, Marta and Hartenberger, Martin and Ommer, Bj\"orn and Navab, Nassir and Adeli, Ehsan}, title = {Latent Drifting in Diffusion Models for Counterfactual Medical Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7685-7695} }
Rethinking Spiking Self-Attention Mechanism: Implementing a-XNOR Similarity Calculation in Spiking Transformers: Yichen Xiao,

Shuai Wang,

Dehao Zhang,

Wenjie Wei,

Yimeng Shan,

Xiaoli Liu,

Yulin Jiang,

Malu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yichen and Wang, Shuai and Zhang, Dehao and Wei, Wenjie and Shan, Yimeng and Liu, Xiaoli and Jiang, Yulin and Zhang, Malu}, title = {Rethinking Spiking Self-Attention Mechanism: Implementing a-XNOR Similarity Calculation in Spiking Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5444-5454} }
HeMoRa: Unsupervised Heuristic Consensus Sampling for Robust Point Cloud Registration: Shaocheng Yan,

Yiming Wang,

Kaiyan Zhao,

Pengcheng Shi,

Zhenjun Zhao,

Yongjun Zhang,

Jiayuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Shaocheng and Wang, Yiming and Zhao, Kaiyan and Shi, Pengcheng and Zhao, Zhenjun and Zhang, Yongjun and Li, Jiayuan}, title = {HeMoRa: Unsupervised Heuristic Consensus Sampling for Robust Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1363-1373} }
Reducing Class-wise Confusion for Incremental Learning with Disentangled Manifolds: Huitong Chen,

Yu Wang,

Yan Fan,

Guosong Jiang,

Qinghua Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Huitong and Wang, Yu and Fan, Yan and Jiang, Guosong and Hu, Qinghua}, title = {Reducing Class-wise Confusion for Incremental Learning with Disentangled Manifolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10121-10130} }
AniMo: Species-Aware Model for Text-Driven Animal Motion Generation: Xuan Wang,

Kai Ruan,

Xing Zhang,

Gaoang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xuan and Ruan, Kai and Zhang, Xing and Wang, Gaoang}, title = {AniMo: Species-Aware Model for Text-Driven Animal Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1929-1939} }
EditAR: Unified Conditional Generation with Autoregressive Models: Jiteng Mu,

Nuno Vasconcelos,

Xiaolong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mu_2025_CVPR, author = {Mu, Jiteng and Vasconcelos, Nuno and Wang, Xiaolong}, title = {EditAR: Unified Conditional Generation with Autoregressive Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7899-7909} }
Instance-wise Supervision-level Optimization in Active Learning: Shinnosuke Matsuo,

Riku Togashi,

Ryoma Bise,

Seiichi Uchida,

Masahiro Nomura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsuo_2025_CVPR, author = {Matsuo, Shinnosuke and Togashi, Riku and Bise, Ryoma and Uchida, Seiichi and Nomura, Masahiro}, title = {Instance-wise Supervision-level Optimization in Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4939-4947} }
BHViT: Binarized Hybrid Vision Transformer: Tian Gao,

Yu Zhang,

Zhiyuan Zhang,

Huajun Liu,

Kaijie Yin,

Chengzhong Xu,

Hui Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Tian and Zhang, Yu and Zhang, Zhiyuan and Liu, Huajun and Yin, Kaijie and Xu, Chengzhong and Kong, Hui}, title = {BHViT: Binarized Hybrid Vision Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3563-3572} }
Pathways on the Image Manifold: Image Editing via Video Generation: Noam Rotstein,

Gal Yona,

Daniel Silver,

Roy Velich,

David Bensaid,

Ron Kimmel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rotstein_2025_CVPR, author = {Rotstein, Noam and Yona, Gal and Silver, Daniel and Velich, Roy and Bensaid, David and Kimmel, Ron}, title = {Pathways on the Image Manifold: Image Editing via Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7857-7866} }
DeSplat: Decomposed Gaussian Splatting for Distractor-Free Rendering: Yihao Wang,

Marcus Klasson,

Matias Turkulainen,

Shuzhe Wang,

Juho Kannala,

Arno Solin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yihao and Klasson, Marcus and Turkulainen, Matias and Wang, Shuzhe and Kannala, Juho and Solin, Arno}, title = {DeSplat: Decomposed Gaussian Splatting for Distractor-Free Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {722-732} }
Stable Flow: Vital Layers for Training-Free Image Editing: Omri Avrahami,

Or Patashnik,

Ohad Fried,

Egor Nemchinov,

Kfir Aberman,

Dani Lischinski,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Avrahami_2025_CVPR, author = {Avrahami, Omri and Patashnik, Or and Fried, Ohad and Nemchinov, Egor and Aberman, Kfir and Lischinski, Dani and Cohen-Or, Daniel}, title = {Stable Flow: Vital Layers for Training-Free Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7877-7888} }
TokenMotion: Decoupled Motion Control via Token Disentanglement for Human-centric Video Generation: Ruineng Li,

Daitao Xing,

Huiming Sun,

Yuanzhou Ha,

Jinglin Shen,

Chiuman Ho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ruineng and Xing, Daitao and Sun, Huiming and Ha, Yuanzhou and Shen, Jinglin and Ho, Chiuman}, title = {TokenMotion: Decoupled Motion Control via Token Disentanglement for Human-centric Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1951-1961} }
CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools: Chinedu Innocent Nwoye,

Kareem Elgohary,

Anvita Srinivas,

Fauzan Zaid,

Joël L. Lavanchy,

Nicolas Padoy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nwoye_2025_CVPR, author = {Nwoye, Chinedu Innocent and Elgohary, Kareem and Srinivas, Anvita and Zaid, Fauzan and Lavanchy, Jo\"el L. and Padoy, Nicolas}, title = {CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8942-8952} }
Conditional Balance: Improving Multi-Conditioning Trade-Offs in Image Generation: Nadav Z. Cohen,

Oron Nir,

Ariel Shamir; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cohen_2025_CVPR, author = {Cohen, Nadav Z. and Nir, Oron and Shamir, Ariel}, title = {Conditional Balance: Improving Multi-Conditioning Trade-Offs in Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2641-2650} }
KeyFace: Expressive Audio-Driven Facial Animation for Long Sequences via KeyFrame Interpolation: Antoni Bigata,

Michał Stypułkowski,

Rodrigo Mira,

Stella Bounareli,

Konstantinos Vougioukas,

Zoe Landgraf,

Nikita Drobyshev,

Maciej Zieba,

Stavros Petridis,

Maja Pantic; [pdf] [supp]
[bibtex]
@InProceedings{Bigata_2025_CVPR, author = {Bigata, Antoni and Stypu{\l}kowski, Micha{\l} and Mira, Rodrigo and Bounareli, Stella and Vougioukas, Konstantinos and Landgraf, Zoe and Drobyshev, Nikita and Zieba, Maciej and Petridis, Stavros and Pantic, Maja}, title = {KeyFace: Expressive Audio-Driven Facial Animation for Long Sequences via KeyFrame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5477-5488} }
Context-Enhanced Memory-Refined Transformer for Online Action Detection: Zhanzhong Pang,

Fadime Sener,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Zhanzhong and Sener, Fadime and Yao, Angela}, title = {Context-Enhanced Memory-Refined Transformer for Online Action Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8700-8710} }
Data-Free Group-Wise Fully Quantized Winograd Convolution via Learnable Scales: Shuokai Pan,

Gerti Tuzi,

Sudarshan Sreeram,

Dibakar Gope; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Shuokai and Tuzi, Gerti and Sreeram, Sudarshan and Gope, Dibakar}, title = {Data-Free Group-Wise Fully Quantized Winograd Convolution via Learnable Scales}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4091-4100} }
GEN3C: 3D-Informed World-Consistent Video Generation with Precise Camera Control: Xuanchi Ren,

Tianchang Shen,

Jiahui Huang,

Huan Ling,

Yifan Lu,

Merlin Nimier-David,

Thomas Müller,

Alexander Keller,

Sanja Fidler,

Jun Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Xuanchi and Shen, Tianchang and Huang, Jiahui and Ling, Huan and Lu, Yifan and Nimier-David, Merlin and M\"uller, Thomas and Keller, Alexander and Fidler, Sanja and Gao, Jun}, title = {GEN3C: 3D-Informed World-Consistent Video Generation with Precise Camera Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6121-6132} }
A Dataset for Semantic Segmentation in the Presence of Unknowns: Zakaria Laskar,

Tomas Vojir,

Matej Grcic,

Iaroslav Melekhov,

Shankar Gangisetty,

Juho Kannala,

Jiri Matas,

Giorgos Tolias,

C.V. Jawahar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Laskar_2025_CVPR, author = {Laskar, Zakaria and Vojir, Tomas and Grcic, Matej and Melekhov, Iaroslav and Gangisetty, Shankar and Kannala, Juho and Matas, Jiri and Tolias, Giorgos and Jawahar, C.V.}, title = {A Dataset for Semantic Segmentation in the Presence of Unknowns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1439-1448} }
HierarQ: Task-Aware Hierarchical Q-Former for Enhanced Video Understanding: Shehreen Azad,

Vibhav Vineet,

Yogesh Singh Rawat; [pdf] [supp]
[bibtex]
@InProceedings{Azad_2025_CVPR, author = {Azad, Shehreen and Vineet, Vibhav and Rawat, Yogesh Singh}, title = {HierarQ: Task-Aware Hierarchical Q-Former for Enhanced Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8545-8556} }
CASP: Compression of Large Multimodal Models Based on Attention Sparsity: Mohsen Gholami,

Mohammad Akbari,

Kevin Cannons,

Yong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gholami_2025_CVPR, author = {Gholami, Mohsen and Akbari, Mohammad and Cannons, Kevin and Zhang, Yong}, title = {CASP: Compression of Large Multimodal Models Based on Attention Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9372-9381} }
UNIC-Adapter: Unified Image-instruction Adapter with Multi-modal Transformer for Image Generation: Lunhao Duan,

Shanshan Zhao,

Wenjun Yan,

Yinglun Li,

Qing-Guo Chen,

Zhao Xu,

Weihua Luo,

Kaifu Zhang,

Mingming Gong,

Gui-Song Xia; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Lunhao and Zhao, Shanshan and Yan, Wenjun and Li, Yinglun and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu and Gong, Mingming and Xia, Gui-Song}, title = {UNIC-Adapter: Unified Image-instruction Adapter with Multi-modal Transformer for Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7963-7973} }
Towards Cost-Effective Learning: A Synergy of Semi-Supervised and Active Learning: Tianxiang Yin,

Ningzhong Liu,

Han Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Tianxiang and Liu, Ningzhong and Sun, Han}, title = {Towards Cost-Effective Learning: A Synergy of Semi-Supervised and Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10163-10172} }
Generative Photography: Scene-Consistent Camera Control for Realistic Text-to-Image Synthesis: Yu Yuan,

Xijun Wang,

Yichen Sheng,

Prateek Chennuri,

Xingguang Zhang,

Stanley Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yu and Wang, Xijun and Sheng, Yichen and Chennuri, Prateek and Zhang, Xingguang and Chan, Stanley}, title = {Generative Photography: Scene-Consistent Camera Control for Realistic Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7920-7930} }
Advancing Manga Analysis: Comprehensive Segmentation Annotations for the Manga109 Dataset: Minshan Xie,

Jian Lin,

Hanyuan Liu,

Chengze Li,

Tien-Tsin Wong; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Minshan and Lin, Jian and Liu, Hanyuan and Li, Chengze and Wong, Tien-Tsin}, title = {Advancing Manga Analysis: Comprehensive Segmentation Annotations for the Manga109 Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8869-8878} }
EnvGS: Modeling View-Dependent Appearance with Environment Gaussian: Tao Xie,

Xi Chen,

Zhen Xu,

Yiman Xie,

Yudong Jin,

Yujun Shen,

Sida Peng,

Hujun Bao,

Xiaowei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Tao and Chen, Xi and Xu, Zhen and Xie, Yiman and Jin, Yudong and Shen, Yujun and Peng, Sida and Bao, Hujun and Zhou, Xiaowei}, title = {EnvGS: Modeling View-Dependent Appearance with Environment Gaussian}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5742-5751} }
Provoking Multi-modal Few-Shot LVLM via Exploration-Exploitation In-Context Learning: Cheng Chen,

Yunpeng Zhai,

Yifan Zhao,

Jinyang Gao,

Bolin Ding,

Jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Cheng and Zhai, Yunpeng and Zhao, Yifan and Gao, Jinyang and Ding, Bolin and Li, Jia}, title = {Provoking Multi-modal Few-Shot LVLM via Exploration-Exploitation In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3826-3835} }
MonoDGP: Monocular 3D Object Detection with Decoupled-Query and Geometry-Error Priors: Fanqi Pu,

Yifan Wang,

Jiru Deng,

Wenming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pu_2025_CVPR, author = {Pu, Fanqi and Wang, Yifan and Deng, Jiru and Yang, Wenming}, title = {MonoDGP: Monocular 3D Object Detection with Decoupled-Query and Geometry-Error Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6520-6530} }
Flexible Group Count Enables Hassle-Free Structured Pruning: Jiamu Zhang,

Shaochen Zhong,

Andrew Ye,

Zirui Liu,

Sebastian Zhao,

Kaixiong Zhou,

Li Li,

Soo-Hyun Choi,

Rui Chen,

Xia Hu,

Shuai Xu,

Vipin Chaudhary; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiamu and Zhong, Shaochen and Ye, Andrew and Liu, Zirui and Zhao, Sebastian and Zhou, Kaixiong and Li, Li and Choi, Soo-Hyun and Chen, Rui and Hu, Xia and Xu, Shuai and Chaudhary, Vipin}, title = {Flexible Group Count Enables Hassle-Free Structured Pruning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4807-4818} }
EasyCraft: A Robust and Efficient Framework for Automatic Avatar Crafting: Suzhen Wang,

Weijie Chen,

Wei Zhang,

Minda Zhao,

Lincheng Li,

Rongsheng Zhang,

Zhipeng Hu,

Xin Yu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Suzhen and Chen, Weijie and Zhang, Wei and Zhao, Minda and Li, Lincheng and Zhang, Rongsheng and Hu, Zhipeng and Yu, Xin}, title = {EasyCraft: A Robust and Efficient Framework for Automatic Avatar Crafting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5581-5591} }
MeshArt: Generating Articulated Meshes with Structure-Guided Transformers: Daoyi Gao,

Yawar Siddiqui,

Lei Li,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Daoyi and Siddiqui, Yawar and Li, Lei and Dai, Angela}, title = {MeshArt: Generating Articulated Meshes with Structure-Guided Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {618-627} }
Adaptive Non-Uniform Timestep Sampling for Accelerating Diffusion Model Training: Myunsoo Kim,

Donghyeon Ki,

Seong-Woong Shim,

Byung-Jun Lee; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Myunsoo and Ki, Donghyeon and Shim, Seong-Woong and Lee, Byung-Jun}, title = {Adaptive Non-Uniform Timestep Sampling for Accelerating Diffusion Model Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2513-2522} }
Explainable Saliency: Articulating Reasoning with Contextual Prioritization: Nuo Chen,

Ming Jiang,

Qi Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Nuo and Jiang, Ming and Zhao, Qi}, title = {Explainable Saliency: Articulating Reasoning with Contextual Prioritization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9601-9610} }
Compass Control: Multi Object Orientation Control for Text-to-Image Generation: Rishubh Parihar,

Vaibhav Agrawal,

Sachidanand VS,

Venkatesh Babu Radhakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parihar_2025_CVPR, author = {Parihar, Rishubh and Agrawal, Vaibhav and VS, Sachidanand and Radhakrishnan, Venkatesh Babu}, title = {Compass Control: Multi Object Orientation Control for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2791-2801} }
Unleashing the Potential of Consistency Learning for Detecting and Grounding Multi-Modal Media Manipulation: Yiheng Li,

Yang Yang,

Zichang Tan,

Huan Liu,

Weihua Chen,

Xu Zhou,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yiheng and Yang, Yang and Tan, Zichang and Liu, Huan and Chen, Weihua and Zhou, Xu and Lei, Zhen}, title = {Unleashing the Potential of Consistency Learning for Detecting and Grounding Multi-Modal Media Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9242-9252} }
VideoGEM: Training-free Action Grounding in Videos: Felix Vogel,

Walid Bousselham,

Anna Kukleva,

Nina Shvetsova,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vogel_2025_CVPR, author = {Vogel, Felix and Bousselham, Walid and Kukleva, Anna and Shvetsova, Nina and Kuehne, Hilde}, title = {VideoGEM: Training-free Action Grounding in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3374-3383} }
Structure-from-Motion with a Non-Parametric Camera Model: Yihan Wang,

Linfei Pan,

Marc Pollefeys,

Viktor Larsson; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yihan and Pan, Linfei and Pollefeys, Marc and Larsson, Viktor}, title = {Structure-from-Motion with a Non-Parametric Camera Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1040-1049} }
LAL: Enhancing 3D Human Motion Prediction with Latency-aware Auxiliary Learning: Xiaoning Sun,

Dong Wei,

Huaijiang Sun,

Shengxiang Hu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Xiaoning and Wei, Dong and Sun, Huaijiang and Hu, Shengxiang}, title = {LAL: Enhancing 3D Human Motion Prediction with Latency-aware Auxiliary Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7105-7114} }
RefPose: Leveraging Reference Geometric Correspondences for Accurate 6D Pose Estimation of Unseen Objects: Jaeguk Kim,

Jaewoo Park,

Keuntek Lee,

Nam Ik Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jaeguk and Park, Jaewoo and Lee, Keuntek and Cho, Nam Ik}, title = {RefPose: Leveraging Reference Geometric Correspondences for Accurate 6D Pose Estimation of Unseen Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6447-6456} }
Relation3D : Enhancing Relation Modeling for Point Cloud Instance Segmentation: Jiahao Lu,

Jiacheng Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Jiahao and Deng, Jiacheng}, title = {Relation3D : Enhancing Relation Modeling for Point Cloud Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8889-8899} }
Shape My Moves: Text-Driven Shape-Aware Synthesis of Human Motions: Ting-Hsuan Liao,

Yi Zhou,

Yu Shen,

Chun-Hao Paul Huang,

Saayan Mitra,

Jia-Bin Huang,

Uttaran Bhattacharya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Ting-Hsuan and Zhou, Yi and Shen, Yu and Huang, Chun-Hao Paul and Mitra, Saayan and Huang, Jia-Bin and Bhattacharya, Uttaran}, title = {Shape My Moves: Text-Driven Shape-Aware Synthesis of Human Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1917-1928} }
Beyond Human Perception: Understanding Multi-Object World from Monocular View: Keyu Guo,

Yongle Huang,

Shijie Sun,

Xiangyu Song,

Mingtao Feng,

Zedong Liu,

Huansheng Song,

Tiantian Wang,

Jianxin Li,

Naveed Akhtar,

Ajmal Saeed Mian; [pdf]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Keyu and Huang, Yongle and Sun, Shijie and Song, Xiangyu and Feng, Mingtao and Liu, Zedong and Song, Huansheng and Wang, Tiantian and Li, Jianxin and Akhtar, Naveed and Mian, Ajmal Saeed}, title = {Beyond Human Perception: Understanding Multi-Object World from Monocular View}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3751-3760} }
LIRM: Large Inverse Rendering Model for Progressive Reconstruction of Shape, Materials and View-dependent Radiance Fields: Zhengqin Li,

Dilin Wang,

Ka Chen,

Zhaoyang Lv,

Thu Nguyen-Phuoc,

Milim Lee,

Jia-Bin Huang,

Lei Xiao,

Yufeng Zhu,

Carl S. Marshall,

Yuheng Ren,

Richard Newcombe,

Zhao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhengqin and Wang, Dilin and Chen, Ka and Lv, Zhaoyang and Nguyen-Phuoc, Thu and Lee, Milim and Huang, Jia-Bin and Xiao, Lei and Zhu, Yufeng and Marshall, Carl S. and Ren, Yuheng and Newcombe, Richard and Dong, Zhao}, title = {LIRM: Large Inverse Rendering Model for Progressive Reconstruction of Shape, Materials and View-dependent Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {505-517} }
Latent Space Super-Resolution for Higher-Resolution Image Generation with Diffusion Models: Jinho Jeong,

Sangmin Han,

Jinwoo Kim,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Jinho and Han, Sangmin and Kim, Jinwoo and Kim, Seon Joo}, title = {Latent Space Super-Resolution for Higher-Resolution Image Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2355-2365} }
Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment: Huakai Lai,

Guoxin Xiong,

Huayu Mai,

Xiang Liu,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Huakai and Xiong, Guoxin and Mai, Huayu and Liu, Xiang and Zhang, Tianzhu}, title = {Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9231-9241} }
Scaling Vision Pre-Training to 4K Resolution: Baifeng Shi,

Boyi Li,

Han Cai,

Yao Lu,

Sifei Liu,

Marco Pavone,

Jan Kautz,

Song Han,

Trevor Darrell,

Pavlo Molchanov,

Hongxu Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Baifeng and Li, Boyi and Cai, Han and Lu, Yao and Liu, Sifei and Pavone, Marco and Kautz, Jan and Han, Song and Darrell, Trevor and Molchanov, Pavlo and Yin, Hongxu}, title = {Scaling Vision Pre-Training to 4K Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9631-9640} }
GarmentPile: Point-Level Visual Affordance Guided Retrieval and Adaptation for Cluttered Garments Manipulation: Ruihai Wu,

Ziyu Zhu,

Yuran Wang,

Yue Chen,

Jiarui Wang,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Ruihai and Zhu, Ziyu and Wang, Yuran and Chen, Yue and Wang, Jiarui and Dong, Hao}, title = {GarmentPile: Point-Level Visual Affordance Guided Retrieval and Adaptation for Cluttered Garments Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6950-6959} }
Improving Editability in Image Generation with Layer-wise Memory: Daneul Kim,

Jaeah Lee,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Daneul and Lee, Jaeah and Park, Jaesik}, title = {Improving Editability in Image Generation with Layer-wise Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7889-7898} }
Simplification Is All You Need against Out-of-Distribution Overconfidence: Keke Tang,

Chao Hou,

Weilong Peng,

Xiang Fang,

Zhize Wu,

Yongwei Nie,

Wenping Wang,

Zhihong Tian; [pdf]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Keke and Hou, Chao and Peng, Weilong and Fang, Xiang and Wu, Zhize and Nie, Yongwei and Wang, Wenping and Tian, Zhihong}, title = {Simplification Is All You Need against Out-of-Distribution Overconfidence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5030-5040} }
SpatialDreamer: Self-supervised Stereo Video Synthesis from Monocular Input: Zhen Lv,

Yangqi Long,

Congzhentao Huang,

Cao Li,

Chengfei Lv,

Hao Ren,

Dian Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Zhen and Long, Yangqi and Huang, Congzhentao and Li, Cao and Lv, Chengfei and Ren, Hao and Zheng, Dian}, title = {SpatialDreamer: Self-supervised Stereo Video Synthesis from Monocular Input}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {811-821} }
LOD-GS: Achieving Levels of Detail using Scalable Gaussian Soup: Jianxiong Shen,

Yue Qian,

Xiaohang Zhan; [pdf]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Jianxiong and Qian, Yue and Zhan, Xiaohang}, title = {LOD-GS: Achieving Levels of Detail using Scalable Gaussian Soup}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {671-680} }
The Devil is in Low-Level Features for Cross-Domain Few-Shot Segmentation: Yuhan Liu,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhan and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {The Devil is in Low-Level Features for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4618-4627} }
Discrete to Continuous: Generating Smooth Transition Poses from Sign Language Observations: Shengeng Tang,

Jiayi He,

Lechao Cheng,

Jingjing Wu,

Dan Guo,

Richang Hong; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Shengeng and He, Jiayi and Cheng, Lechao and Wu, Jingjing and Guo, Dan and Hong, Richang}, title = {Discrete to Continuous: Generating Smooth Transition Poses from Sign Language Observations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3481-3491} }
Unified Medical Lesion Segmentation via Self-referring Indicator: Shijie Chang,

Xiaoqi Zhao,

Lihe Zhang,

Tiancheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Shijie and Zhao, Xiaoqi and Zhang, Lihe and Wang, Tiancheng}, title = {Unified Medical Lesion Segmentation via Self-referring Indicator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10414-10424} }
Unveiling Differences in Generative Models: A Scalable Differential Clustering Approach: Jingwei Zhang,

Mohammad Jalali,

Cheuk Ting Li,

Farzan Farnia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jingwei and Jalali, Mohammad and Li, Cheuk Ting and Farnia, Farzan}, title = {Unveiling Differences in Generative Models: A Scalable Differential Clustering Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8269-8278} }
Semantic Library Adaptation: LoRA Retrieval and Fusion for Open-Vocabulary Semantic Segmentation: Reza Qorbani,

Gianluca Villani,

Theodoros Panagiotakopoulos,

Marc Botet Colomer,

Linus Härenstam-Nielsen,

Mattia Segu,

Pier Luigi Dovesi,

Jussi Karlgren,

Daniel Cremers,

Federico Tombari,

Matteo Poggi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qorbani_2025_CVPR, author = {Qorbani, Reza and Villani, Gianluca and Panagiotakopoulos, Theodoros and Colomer, Marc Botet and H\"arenstam-Nielsen, Linus and Segu, Mattia and Dovesi, Pier Luigi and Karlgren, Jussi and Cremers, Daniel and Tombari, Federico and Poggi, Matteo}, title = {Semantic Library Adaptation: LoRA Retrieval and Fusion for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9804-9815} }
PhyS-EdiT: Physics-aware Semantic Image Editing with Text Description: Ziqi Cai,

Shuchen Weng,

Yifei Xia,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Ziqi and Weng, Shuchen and Xia, Yifei and Shi, Boxin}, title = {PhyS-EdiT: Physics-aware Semantic Image Editing with Text Description}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7867-7876} }
SceneDiffuser++: City-Scale Traffic Simulation via a Generative World Model: Shuhan Tan,

John Lambert,

Hong Jeon,

Sakshum Kulshrestha,

Yijing Bai,

Jing Luo,

Dragomir Anguelov,

Mingxing Tan,

Chiyu Max Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Shuhan and Lambert, John and Jeon, Hong and Kulshrestha, Sakshum and Bai, Yijing and Luo, Jing and Anguelov, Dragomir and Tan, Mingxing and Jiang, Chiyu Max}, title = {SceneDiffuser++: City-Scale Traffic Simulation via a Generative World Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1570-1580} }
Gaussian Splatting Feature Fields for (Privacy-Preserving) Visual Localization: Maxime Pietrantoni,

Gabriela Csurka,

Torsten Sattler; [pdf] [supp]
[bibtex]
@InProceedings{Pietrantoni_2025_CVPR, author = {Pietrantoni, Maxime and Csurka, Gabriela and Sattler, Torsten}, title = {Gaussian Splatting Feature Fields for (Privacy-Preserving) Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1082-1092} }
Gazing Into Missteps: Leveraging Eye-Gaze for Unsupervised Mistake Detection in Egocentric Videos of Skilled Human Activities: Michele Mazzamuto,

Antonino Furnari,

Yoichi Sato,

Giovanni Maria Farinella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mazzamuto_2025_CVPR, author = {Mazzamuto, Michele and Furnari, Antonino and Sato, Yoichi and Farinella, Giovanni Maria}, title = {Gazing Into Missteps: Leveraging Eye-Gaze for Unsupervised Mistake Detection in Egocentric Videos of Skilled Human Activities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8310-8320} }
Recover and Match: Open-Vocabulary Multi-Label Recognition through Knowledge-Constrained Optimal Transport: Hao Tan,

Zichang Tan,

Jun Li,

Ajian Liu,

Jun Wan,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Hao and Tan, Zichang and Li, Jun and Liu, Ajian and Wan, Jun and Lei, Zhen}, title = {Recover and Match: Open-Vocabulary Multi-Label Recognition through Knowledge-Constrained Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4650-4660} }
DyFo: A Training-Free Dynamic Focus Visual Search for Enhancing LMMs in Fine-Grained Visual Understanding: Geng Li,

Jinglin Xu,

Yunzhen Zhao,

Yuxin Peng; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Geng and Xu, Jinglin and Zhao, Yunzhen and Peng, Yuxin}, title = {DyFo: A Training-Free Dynamic Focus Visual Search for Enhancing LMMs in Fine-Grained Visual Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9098-9108} }
From Head to Tail: Towards Balanced Representation in Large Vision-Language Models through Adaptive Data Calibration: Mingyang Song,

Xiaoye Qu,

Jiawei Zhou,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Mingyang and Qu, Xiaoye and Zhou, Jiawei and Cheng, Yu}, title = {From Head to Tail: Towards Balanced Representation in Large Vision-Language Models through Adaptive Data Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9434-9444} }
HERA: Hybrid Explicit Representation for Ultra-Realistic Head Avatars: Hongrui Cai,

Yuting Xiao,

Xuan Wang,

Jiafei Li,

Yudong Guo,

Yanbo Fan,

Shenghua Gao,

Juyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Hongrui and Xiao, Yuting and Wang, Xuan and Li, Jiafei and Guo, Yudong and Fan, Yanbo and Gao, Shenghua and Zhang, Juyong}, title = {HERA: Hybrid Explicit Representation for Ultra-Realistic Head Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {260-270} }
CoE: Chain-of-Explanation via Automatic Visual Concept Circuit Description and Polysemanticity Quantification: Wenlong Yu,

Qilong Wang,

Chuang Liu,

Dong Li,

Qinghua Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Wenlong and Wang, Qilong and Liu, Chuang and Li, Dong and Hu, Qinghua}, title = {CoE: Chain-of-Explanation via Automatic Visual Concept Circuit Description and Polysemanticity Quantification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4364-4374} }
Hierarchical Adaptive Filtering Network for Text Image Specular Highlight Removal: Zhi Jiang,

Jingbo Hu,

Ling Zhang,

Gang Fu,

Chunxia Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zhi and Hu, Jingbo and Zhang, Ling and Fu, Gang and Xiao, Chunxia}, title = {Hierarchical Adaptive Filtering Network for Text Image Specular Highlight Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2408-2417} }
Learning Extremely High Density Crowds as Active Matters: Feixiang He,

Jiangbei Yue,

Jialin Zhu,

Armin Seyfried,

Dan Casas,

Julien Pettré,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Feixiang and Yue, Jiangbei and Zhu, Jialin and Seyfried, Armin and Casas, Dan and Pettr\'e, Julien and Wang, He}, title = {Learning Extremely High Density Crowds as Active Matters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {540-550} }
EchoMimicV2: Towards Striking, Simplified, and Semi-Body Human Animation: Rang Meng,

Xingyu Zhang,

Yuming Li,

Chenguang Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Rang and Zhang, Xingyu and Li, Yuming and Ma, Chenguang}, title = {EchoMimicV2: Towards Striking, Simplified, and Semi-Body Human Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5489-5498} }
Gaussian Splatting for Efficient Satellite Image Photogrammetry: Luca Savant Aira,

Gabriele Facciolo,

Thibaud Ehret; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aira_2025_CVPR, author = {Aira, Luca Savant and Facciolo, Gabriele and Ehret, Thibaud}, title = {Gaussian Splatting for Efficient Satellite Image Photogrammetry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5959-5969} }
Towards Improved Text-Aligned Codebook Learning: Multi-Hierarchical Codebook-Text Alignment with Long Text: Guotao Liang,

Baoquan Zhang,

Zhiyuan Wen,

Junteng Zhao,

Yunming Ye,

Kola Ye,

Yao He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Guotao and Zhang, Baoquan and Wen, Zhiyuan and Zhao, Junteng and Ye, Yunming and Ye, Kola and He, Yao}, title = {Towards Improved Text-Aligned Codebook Learning: Multi-Hierarchical Codebook-Text Alignment with Long Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4060-4069} }
Parallel Sequence Modeling via Generalized Spatial Propagation Network: Hongjun Wang,

Wonmin Byeon,

Jiarui Xu,

Jinwei Gu,

Ka Chun Cheung,

Xiaolong Wang,

Kai Han,

Jan Kautz,

Sifei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hongjun and Byeon, Wonmin and Xu, Jiarui and Gu, Jinwei and Cheung, Ka Chun and Wang, Xiaolong and Han, Kai and Kautz, Jan and Liu, Sifei}, title = {Parallel Sequence Modeling via Generalized Spatial Propagation Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4473-4483} }
NADER: Neural Architecture Design via Multi-Agent Collaboration: Zekang Yang,

Wang Zeng,

Sheng Jin,

Chen Qian,

Ping Luo,

Wentao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zekang and Zeng, Wang and Jin, Sheng and Qian, Chen and Luo, Ping and Liu, Wentao}, title = {NADER: Neural Architecture Design via Multi-Agent Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4452-4461} }
Fortifying Federated Learning Towards Trustworthiness via Auditable Data Valuation and Verifiable Client Contribution: K Naveen Kumar,

Ranjeet Ranjan Jha,

C Krishna Mohan,

Ravindra Babu Tallamraju; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2025_CVPR, author = {Kumar, K Naveen and Jha, Ranjeet Ranjan and Mohan, C Krishna and Tallamraju, Ravindra Babu}, title = {Fortifying Federated Learning Towards Trustworthiness via Auditable Data Valuation and Verifiable Client Contribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4999-5009} }
UniPre3D: Unified Pre-training of 3D Point Cloud Models with Cross-Modal Gaussian Splatting: Ziyi Wang,

Yanran Zhang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ziyi and Zhang, Yanran and Zhou, Jie and Lu, Jiwen}, title = {UniPre3D: Unified Pre-training of 3D Point Cloud Models with Cross-Modal Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1319-1329} }
Charm: The Missing Piece in ViT Fine-Tuning for Image Aesthetic Assessment: Fatemeh Behrad,

Tinne Tuytelaars,

Johan Wagemans; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behrad_2025_CVPR, author = {Behrad, Fatemeh and Tuytelaars, Tinne and Wagemans, Johan}, title = {Charm: The Missing Piece in ViT Fine-Tuning for Image Aesthetic Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7815-7824} }
SeeGround: See and Ground for Zero-Shot Open-Vocabulary 3D Visual Grounding: Rong Li,

Shijie Li,

Lingdong Kong,

Xulei Yang,

Junwei Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Rong and Li, Shijie and Kong, Lingdong and Yang, Xulei and Liang, Junwei}, title = {SeeGround: See and Ground for Zero-Shot Open-Vocabulary 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3707-3717} }
Linear Attention Modeling for Learned Image Compression: Donghui Feng,

Zhengxue Cheng,

Shen Wang,

Ronghua Wu,

Hongwei Hu,

Guo Lu,

Li Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Donghui and Cheng, Zhengxue and Wang, Shen and Wu, Ronghua and Hu, Hongwei and Lu, Guo and Song, Li}, title = {Linear Attention Modeling for Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7623-7632} }
Asynchronous Collaborative Graph Representation for Frames and Events: Dianze Li,

Jianing Li,

Xu Liu,

Xiaopeng Fan,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Dianze and Li, Jianing and Liu, Xu and Fan, Xiaopeng and Tian, Yonghong}, title = {Asynchronous Collaborative Graph Representation for Frames and Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1655-1666} }
ReconDreamer: Crafting World Models for Driving Scene Reconstruction via Online Restoration: Chaojun Ni,

Guosheng Zhao,

Xiaofeng Wang,

Zheng Zhu,

Wenkang Qin,

Guan Huang,

Chen Liu,

Yuyin Chen,

Yida Wang,

Xueyang Zhang,

Yifei Zhan,

Kun Zhan,

Peng Jia,

Xianpeng Lang,

Xingang Wang,

Wenjun Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Chaojun and Zhao, Guosheng and Wang, Xiaofeng and Zhu, Zheng and Qin, Wenkang and Huang, Guan and Liu, Chen and Chen, Yuyin and Wang, Yida and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Jia, Peng and Lang, Xianpeng and Wang, Xingang and Mei, Wenjun}, title = {ReconDreamer: Crafting World Models for Driving Scene Reconstruction via Online Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1559-1569} }
GenFusion: Closing the Loop between Reconstruction and Generation via Videos: Sibo Wu,

Congrong Xu,

Binbin Huang,

Andreas Geiger,

Anpei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Sibo and Xu, Congrong and Huang, Binbin and Geiger, Andreas and Chen, Anpei}, title = {GenFusion: Closing the Loop between Reconstruction and Generation via Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6078-6088} }
The PanAf-FGBG Dataset: Understanding the Impact of Backgrounds in Wildlife Behaviour Recognition: Otto Brookes,

Maksim Kukushkin,

Majid Mirmehdi,

Colleen Stephens,

Paula Dieguez,

Thurston C. Hicks,

Sorrel Jones,

Kevin Lee,

Maureen S. McCarthy,

Amelia Meier,

Emmanuelle Normand,

Erin G. Wessling,

Roman M. Wittig,

Kevin Langergraber,

Klaus Zuberbühler,

Lukas Boesch,

Thomas Schmid,

Mimi Arandjelovic,

Hjalmar Kühl,

Tilo Burghardt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brookes_2025_CVPR, author = {Brookes, Otto and Kukushkin, Maksim and Mirmehdi, Majid and Stephens, Colleen and Dieguez, Paula and Hicks, Thurston C. and Jones, Sorrel and Lee, Kevin and McCarthy, Maureen S. and Meier, Amelia and Normand, Emmanuelle and Wessling, Erin G. and Wittig, Roman M. and Langergraber, Kevin and Zuberb\"uhler, Klaus and Boesch, Lukas and Schmid, Thomas and Arandjelovic, Mimi and K\"uhl, Hjalmar and Burghardt, Tilo}, title = {The PanAf-FGBG Dataset: Understanding the Impact of Backgrounds in Wildlife Behaviour Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5433-5443} }
Sonic: Shifting Focus to Global Audio Perception in Portrait Animation: Xiaozhong Ji,

Xiaobin Hu,

Zhihong Xu,

Junwei Zhu,

Chuming Lin,

Qingdong He,

Jiangning Zhang,

Donghao Luo,

Yi Chen,

Qin Lin,

Qinglin Lu,

Chengjie Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Xiaozhong and Hu, Xiaobin and Xu, Zhihong and Zhu, Junwei and Lin, Chuming and He, Qingdong and Zhang, Jiangning and Luo, Donghao and Chen, Yi and Lin, Qin and Lu, Qinglin and Wang, Chengjie}, title = {Sonic: Shifting Focus to Global Audio Perception in Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {193-203} }
Multitwine: Multi-Object Compositing with Text and Layout Control: Gemma Canet Tarrés,

Zhe Lin,

Zhifei Zhang,

He Zhang,

Andrew Gilbert,

John Collomosse,

Soo Ye Kim; [pdf] [supp]
[bibtex]
@InProceedings{Tarres_2025_CVPR, author = {Tarr\'es, Gemma Canet and Lin, Zhe and Zhang, Zhifei and Zhang, He and Gilbert, Andrew and Collomosse, John and Kim, Soo Ye}, title = {Multitwine: Multi-Object Compositing with Text and Layout Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8094-8104} }
Video Depth without Video Models: Bingxin Ke,

Dominik Narnhofer,

Shengyu Huang,

Lei Ke,

Torben Peters,

Katerina Fragkiadaki,

Anton Obukhov,

Konrad Schindler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Bingxin and Narnhofer, Dominik and Huang, Shengyu and Ke, Lei and Peters, Torben and Fragkiadaki, Katerina and Obukhov, Anton and Schindler, Konrad}, title = {Video Depth without Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7233-7243} }
PointLoRA: Low-Rank Adaptation with Token Selection for Point Cloud Learning: Song Wang,

Xiaolu Liu,

Lingdong Kong,

Jianyun Xu,

Chunyong Hu,

Gongfan Fang,

Wentong Li,

Jianke Zhu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Song and Liu, Xiaolu and Kong, Lingdong and Xu, Jianyun and Hu, Chunyong and Fang, Gongfan and Li, Wentong and Zhu, Jianke and Wang, Xinchao}, title = {PointLoRA: Low-Rank Adaptation with Token Selection for Point Cloud Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6605-6615} }
HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset: Zedong Chu,

Feng Xiong,

Meiduo Liu,

Jinzhi Zhang,

Mingqi Shao,

Zhaoxu Sun,

Di Wang,

Mu Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Chu_2025_CVPR, author = {Chu, Zedong and Xiong, Feng and Liu, Meiduo and Zhang, Jinzhi and Shao, Mingqi and Sun, Zhaoxu and Wang, Di and Xu, Mu}, title = {HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {304-313} }
GaussHDR: High Dynamic Range Gaussian Splatting via Learning Unified 3D and 2D Local Tone Mapping: Jinfeng Liu,

Lingtong Kong,

Bo Li,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jinfeng and Kong, Lingtong and Li, Bo and Xu, Dan}, title = {GaussHDR: High Dynamic Range Gaussian Splatting via Learning Unified 3D and 2D Local Tone Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5991-6000} }
Channel-wise Noise Scheduled Diffusion for Inverse Rendering in Indoor Scenes: JunYong Choi,

Min-cheol Sagong,

SeokYeong Lee,

Seung-Won Jung,

Ig-Jae Kim,

Junghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, JunYong and Sagong, Min-cheol and Lee, SeokYeong and Jung, Seung-Won and Kim, Ig-Jae and Cho, Junghyun}, title = {Channel-wise Noise Scheduled Diffusion for Inverse Rendering in Indoor Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5773-5782} }
Targeted Forgetting of Image Subgroups in CLIP Models: Zeliang Zhang,

Gaowen Liu,

Charles Fleming,

Ramana Rao Kompella,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zeliang and Liu, Gaowen and Fleming, Charles and Kompella, Ramana Rao and Xu, Chenliang}, title = {Targeted Forgetting of Image Subgroups in CLIP Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9870-9880} }
SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model: Chunlin Yu,

Hanqing Wang,

Ye Shi,

Haoyang Luo,

Sibei Yang,

Jingyi Yu,

Jingya Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Chunlin and Wang, Hanqing and Shi, Ye and Luo, Haoyang and Yang, Sibei and Yu, Jingyi and Wang, Jingya}, title = {SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1691-1701} }
DSV-LFS: Unifying LLM-Driven Semantic Cues with Visual Features for Robust Few-Shot Segmentation: Amin Karimi,

Charalambos Poullis; [pdf] [supp]
[bibtex]
@InProceedings{Karimi_2025_CVPR, author = {Karimi, Amin and Poullis, Charalambos}, title = {DSV-LFS: Unifying LLM-Driven Semantic Cues with Visual Features for Robust Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4584-4594} }
SemAlign3D: Semantic Correspondence between RGB-Images through Aligning 3D Object-Class Representations: Krispin Wandel,

Hesheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wandel_2025_CVPR, author = {Wandel, Krispin and Wang, Hesheng}, title = {SemAlign3D: Semantic Correspondence between RGB-Images through Aligning 3D Object-Class Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1138-1147} }
DPU: Dynamic Prototype Updating for Multimodal Out-of-Distribution Detection: Shawn Li,

Huixian Gong,

Hao Dong,

Tiankai Yang,

Zhengzhong Tu,

Yue Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shawn and Gong, Huixian and Dong, Hao and Yang, Tiankai and Tu, Zhengzhong and Zhao, Yue}, title = {DPU: Dynamic Prototype Updating for Multimodal Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10193-10202} }
Towards Unbiased and Robust Spatio-Temporal Scene Graph Generation and Anticipation: Rohith Peddi,

Saurabh Saurabh,

Ayush Abhay Shrivastava,

Parag Singla,

Vibhav Gogate; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peddi_2025_CVPR, author = {Peddi, Rohith and Saurabh, Saurabh and Shrivastava, Ayush Abhay and Singla, Parag and Gogate, Vibhav}, title = {Towards Unbiased and Robust Spatio-Temporal Scene Graph Generation and Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8648-8657} }
Boosting Domain Incremental Learning: Selecting the Optimal Parameters is All You Need: Qiang Wang,

Xiang Song,

Yuhang He,

Jizhou Han,

Chenhao Ding,

Xinyuan Gao,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qiang and Song, Xiang and He, Yuhang and Han, Jizhou and Ding, Chenhao and Gao, Xinyuan and Gong, Yihong}, title = {Boosting Domain Incremental Learning: Selecting the Optimal Parameters is All You Need}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4839-4849} }
Perceptual Inductive Bias Is What You Need Before Contrastive Learning: Junru Zhao,

Tianqin Li,

Dunhan Jiang,

Shenghao Wu,

Alan Ramirez,

Tai Sing Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Junru and Li, Tianqin and Jiang, Dunhan and Wu, Shenghao and Ramirez, Alan and Lee, Tai Sing}, title = {Perceptual Inductive Bias Is What You Need Before Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9621-9630} }
FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs: Xiaoqin Wang,

Xusen Ma,

Xianxu Hou,

Meidan Ding,

Yudong Li,

Junliang Chen,

Wenting Chen,

Xiaoyang Peng,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xiaoqin and Ma, Xusen and Hou, Xianxu and Ding, Meidan and Li, Yudong and Chen, Junliang and Chen, Wenting and Peng, Xiaoyang and Shen, Linlin}, title = {FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9154-9164} }
EffiDec3D: An Optimized Decoder for High-Performance and Efficient 3D Medical Image Segmentation: Md Mostafijur Rahman,

Radu Marculescu; [pdf] [supp]
[bibtex]
@InProceedings{Rahman_2025_CVPR, author = {Rahman, Md Mostafijur and Marculescu, Radu}, title = {EffiDec3D: An Optimized Decoder for High-Performance and Efficient 3D Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10435-10444} }
Exploring Historical Information for RGBE Visual Tracking with Mamba: Chuanyu Sun,

Jiqing Zhang,

Yang Wang,

Huilin Ge,

Qianchen Xia,

Baocai Yin,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Chuanyu and Zhang, Jiqing and Wang, Yang and Ge, Huilin and Xia, Qianchen and Yin, Baocai and Yang, Xin}, title = {Exploring Historical Information for RGBE Visual Tracking with Mamba}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6500-6509} }
ArtiScene: Language-Driven Artistic 3D Scene Generation Through Image Intermediary: Zeqi Gu,

Yin Cui,

Zhaoshuo Li,

Fangyin Wei,

Yunhao Ge,

Jinwei Gu,

Ming-Yu Liu,

Abe Davis,

Yifan Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Zeqi and Cui, Yin and Li, Zhaoshuo and Wei, Fangyin and Ge, Yunhao and Gu, Jinwei and Liu, Ming-Yu and Davis, Abe and Ding, Yifan}, title = {ArtiScene: Language-Driven Artistic 3D Scene Generation Through Image Intermediary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2891-2901} }
Improving Sound Source Localization with Joint Slot Attention on Image and Audio: Inho Kim,

Youngkil Song,

Jicheol Park,

Won Hwa Kim,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Inho and Song, Youngkil and Park, Jicheol and Kim, Won Hwa and Kwak, Suha}, title = {Improving Sound Source Localization with Joint Slot Attention on Image and Audio}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3121-3130} }
Feature-Preserving Mesh Decimation for Normal Integration: Moritz Heep,

Sven Behnke,

Eduard Zell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heep_2025_CVPR, author = {Heep, Moritz and Behnke, Sven and Zell, Eduard}, title = {Feature-Preserving Mesh Decimation for Normal Integration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5783-5792} }
DexHandDiff: Interaction-aware Diffusion Planning for Adaptive Dexterous Manipulation: Zhixuan Liang,

Yao Mu,

Yixiao Wang,

Tianxing Chen,

Wenqi Shao,

Wei Zhan,

Masayoshi Tomizuka,

Ping Luo,

Mingyu Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Zhixuan and Mu, Yao and Wang, Yixiao and Chen, Tianxing and Shao, Wenqi and Zhan, Wei and Tomizuka, Masayoshi and Luo, Ping and Ding, Mingyu}, title = {DexHandDiff: Interaction-aware Diffusion Planning for Adaptive Dexterous Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1745-1755} }
VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness: SeungJu Cha,

Kwanyoung Lee,

Ye-Chan Kim,

Hyunwoo Oh,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2025_CVPR, author = {Cha, SeungJu and Lee, Kwanyoung and Kim, Ye-Chan and Oh, Hyunwoo and Kim, Dong-Jin}, title = {VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8041-8050} }
Memories of Forgotten Concepts: Matan Rusanovsky,

Shimon Malnick,

Amir Jevnisek,

Ohad Fried,

Shai Avidan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rusanovsky_2025_CVPR, author = {Rusanovsky, Matan and Malnick, Shimon and Jevnisek, Amir and Fried, Ohad and Avidan, Shai}, title = {Memories of Forgotten Concepts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2966-2975} }
PSBD: Prediction Shift Uncertainty Unlocks Backdoor Detection: Wei Li,

Pin-Yu Chen,

Sijia Liu,

Ren Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wei and Chen, Pin-Yu and Liu, Sijia and Wang, Ren}, title = {PSBD: Prediction Shift Uncertainty Unlocks Backdoor Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10255-10264} }
Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction: Wenke Xia,

Ruoxuan Feng,

Dong Wang,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Wenke and Feng, Ruoxuan and Wang, Dong and Hu, Di}, title = {Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6981-6990} }
Multirate Neural Image Compression with Adaptive Lattice Vector Quantization: Hao Xu,

Xiaolin Wu,

Xi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Hao and Wu, Xiaolin and Zhang, Xi}, title = {Multirate Neural Image Compression with Adaptive Lattice Vector Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7633-7642} }
EventFly: Event Camera Perception from Ground to the Sky: Lingdong Kong,

Dongyue Lu,

Xiang Xu,

Lai Xing Ng,

Wei Tsang Ooi,

Benoit R. Cottereau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Lingdong and Lu, Dongyue and Xu, Xiang and Ng, Lai Xing and Ooi, Wei Tsang and Cottereau, Benoit R.}, title = {EventFly: Event Camera Perception from Ground to the Sky}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1472-1484} }
CH3Depth: Efficient and Flexible Depth Foundation Model with Flow Matching: Jiaqi Li,

Yiran Wang,

Jinghong Zheng,

Junrui Zhang,

Liao Shen,

Tianqi Liu,

Zhiguo Cao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiaqi and Wang, Yiran and Zheng, Jinghong and Zhang, Junrui and Shen, Liao and Liu, Tianqi and Cao, Zhiguo}, title = {CH3Depth: Efficient and Flexible Depth Foundation Model with Flow Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7222-7232} }
Pow3R: Empowering Unconstrained 3D Reconstruction with Camera and Scene Priors: Wonbong Jang,

Philippe Weinzaepfel,

Vincent Leroy,

Lourdes Agapito,

Jerome Revaud; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Wonbong and Weinzaepfel, Philippe and Leroy, Vincent and Agapito, Lourdes and Revaud, Jerome}, title = {Pow3R: Empowering Unconstrained 3D Reconstruction with Camera and Scene Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1071-1081} }
MambaVLT: Time-Evolving Multimodal State Space Model for Vision-Language Tracking: Xinqi Liu,

Li Zhou,

Zikun Zhou,

Jianqiu Chen,

Zhenyu He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xinqi and Zhou, Li and Zhou, Zikun and Chen, Jianqiu and He, Zhenyu}, title = {MambaVLT: Time-Evolving Multimodal State Space Model for Vision-Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8731-8741} }
Adaptive Parameter Selection for Tuning Vision-Language Models: Yi Zhang,

Yi-Xuan Deng,

Meng-Hao Guo,

Shi-Min Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yi and Deng, Yi-Xuan and Guo, Meng-Hao and Hu, Shi-Min}, title = {Adaptive Parameter Selection for Tuning Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4280-4290} }
Learning-enabled Polynomial Lyapunov Function Synthesis via High-Accuracy Counterexample-Guided Framework: Hanrui Zhao,

Niuniu Qi,

Mengxin Ren,

Banglong Liu,

Shuming Shi,

Zhengfeng Yang; [pdf]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hanrui and Qi, Niuniu and Ren, Mengxin and Liu, Banglong and Shi, Shuming and Yang, Zhengfeng}, title = {Learning-enabled Polynomial Lyapunov Function Synthesis via High-Accuracy Counterexample-Guided Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10275-10284} }
Patient-Level Anatomy Meets Scanning-Level Physics: Personalized Federated Low-Dose CT Denoising Empowered by Large Language Model: Ziyuan Yang,

Yingyu Chen,

Zhiwen Wang,

Hongming Shan,

Yang Chen,

Yi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Ziyuan and Chen, Yingyu and Wang, Zhiwen and Shan, Hongming and Chen, Yang and Zhang, Yi}, title = {Patient-Level Anatomy Meets Scanning-Level Physics: Personalized Federated Low-Dose CT Denoising Empowered by Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5154-5163} }
Exploiting Deblurring Networks for Radiance Fields: Haeyun Choi,

Heemin Yang,

Janghyeok Han,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Haeyun and Yang, Heemin and Han, Janghyeok and Cho, Sunghyun}, title = {Exploiting Deblurring Networks for Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6012-6021} }
Rethinking Lanes and Points in Complex Scenarios for Monocular 3D Lane Detection: Yifan Chang,

Junjie Huang,

Xiaofeng Wang,

Yun Ye,

Zhujin Liang,

Yi Shan,

Dalong Du,

Xingang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Yifan and Huang, Junjie and Wang, Xiaofeng and Ye, Yun and Liang, Zhujin and Shan, Yi and Du, Dalong and Wang, Xingang}, title = {Rethinking Lanes and Points in Complex Scenarios for Monocular 3D Lane Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6802-6811} }
PSA-SSL: Pose and Size-aware Self-Supervised Learning on LiDAR Point Clouds: Barza Nisar,

Steven L. Waslander; [pdf] [supp]
[bibtex]
@InProceedings{Nisar_2025_CVPR, author = {Nisar, Barza and Waslander, Steven L.}, title = {PSA-SSL: Pose and Size-aware Self-Supervised Learning on LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6670-6679} }
SnapGen: Taming High-Resolution Text-to-Image Models for Mobile Devices with Efficient Architectures and Training: Jierun Chen,

Dongting Hu,

Xijie Huang,

Huseyin Coskun,

Arpit Sahni,

Aarush Gupta,

Anujraaj Goyal,

Dishani Lahiri,

Rajesh Singh,

Yerlan Idelbayev,

Junli Cao,

Yanyu Li,

Kwang-Ting Cheng,

S.-H. Gary Chan,

Mingming Gong,

Sergey Tulyakov,

Anil Kag,

Yanwu Xu,

Jian Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jierun and Hu, Dongting and Huang, Xijie and Coskun, Huseyin and Sahni, Arpit and Gupta, Aarush and Goyal, Anujraaj and Lahiri, Dishani and Singh, Rajesh and Idelbayev, Yerlan and Cao, Junli and Li, Yanyu and Cheng, Kwang-Ting and Chan, S.-H. Gary and Gong, Mingming and Tulyakov, Sergey and Kag, Anil and Xu, Yanwu and Ren, Jian}, title = {SnapGen: Taming High-Resolution Text-to-Image Models for Mobile Devices with Efficient Architectures and Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7997-8008} }
Community Forensics: Using Thousands of Generators to Train Fake Image Detectors: Jeongsoo Park,

Andrew Owens; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jeongsoo and Owens, Andrew}, title = {Community Forensics: Using Thousands of Generators to Train Fake Image Detectors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8245-8257} }
ModeSeq: Taming Sparse Multimodal Motion Prediction with Sequential Mode Modeling: Zikang Zhou,

Hengjian Zhou,

Haibo Hu,

Zihao Wen,

Jianping Wang,

Yung-Hui Li,

Yu-Kai Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zikang and Zhou, Hengjian and Hu, Haibo and Wen, Zihao and Wang, Jianping and Li, Yung-Hui and Huang, Yu-Kai}, title = {ModeSeq: Taming Sparse Multimodal Motion Prediction with Sequential Mode Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1612-1621} }
Quaffure: Real-Time Quasi-Static Neural Hair Simulation: Tuur Stuyck,

Gene Wei-Chin Lin,

Egor Larionov,

Hsiao-yu Chen,

Aljaz Bozic,

Nikolaos Sarafianos,

Doug Roble; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stuyck_2025_CVPR, author = {Stuyck, Tuur and Lin, Gene Wei-Chin and Larionov, Egor and Chen, Hsiao-yu and Bozic, Aljaz and Sarafianos, Nikolaos and Roble, Doug}, title = {Quaffure: Real-Time Quasi-Static Neural Hair Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {239-249} }
LumiNet: Latent Intrinsics Meets Diffusion Models for Indoor Scene Relighting: Xiaoyan Xing,

Konrad Groh,

Sezer Karaoglu,

Theo Gevers,

Anand Bhattad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Xiaoyan and Groh, Konrad and Karaoglu, Sezer and Gevers, Theo and Bhattad, Anand}, title = {LumiNet: Latent Intrinsics Meets Diffusion Models for Indoor Scene Relighting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {442-452} }
DiC: Rethinking Conv3x3 Designs in Diffusion Models: Yuchuan Tian,

Jing Han,

Chengcheng Wang,

Yuchen Liang,

Chao Xu,

Hanting Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Yuchuan and Han, Jing and Wang, Chengcheng and Liang, Yuchen and Xu, Chao and Chen, Hanting}, title = {DiC: Rethinking Conv3x3 Designs in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2469-2478} }
MoSca: Dynamic Gaussian Fusion from Casual Videos via 4D Motion Scaffolds: Jiahui Lei,

Yijia Weng,

Adam W. Harley,

Leonidas Guibas,

Kostas Daniilidis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Jiahui and Weng, Yijia and Harley, Adam W. and Guibas, Leonidas and Daniilidis, Kostas}, title = {MoSca: Dynamic Gaussian Fusion from Casual Videos via 4D Motion Scaffolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6165-6177} }
Not Just Text: Uncovering Vision Modality Typographic Threats in Image Generation Models: Hao Cheng,

Erjia Xiao,

Jiayan Yang,

Jiahang Cao,

Qiang Zhang,

Jize Zhang,

Kaidi Xu,

Jindong Gu,

Renjing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Hao and Xiao, Erjia and Yang, Jiayan and Cao, Jiahang and Zhang, Qiang and Zhang, Jize and Xu, Kaidi and Gu, Jindong and Xu, Renjing}, title = {Not Just Text: Uncovering Vision Modality Typographic Threats in Image Generation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2997-3007} }
TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization: Liang Pan,

Zeshi Yang,

Zhiyang Dou,

Wenjia Wang,

Buzhen Huang,

Bo Dai,

Taku Komura,

Jingbo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Liang and Yang, Zeshi and Dou, Zhiyang and Wang, Wenjia and Huang, Buzhen and Dai, Bo and Komura, Taku and Wang, Jingbo}, title = {TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5379-5391} }
ShapeShifter: 3D Variations Using Multiscale and Sparse Point-Voxel Diffusion: Nissim Maruani,

Wang Yifan,

Matthew Fisher,

Pierre Alliez,

Mathieu Desbrun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maruani_2025_CVPR, author = {Maruani, Nissim and Yifan, Wang and Fisher, Matthew and Alliez, Pierre and Desbrun, Mathieu}, title = {ShapeShifter: 3D Variations Using Multiscale and Sparse Point-Voxel Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {605-617} }
FRESA: Feedforward Reconstruction of Personalized Skinned Avatars from Few Images: Rong Wang,

Fabian Prada,

Ziyan Wang,

Zhongshi Jiang,

Chengxiang Yin,

Junxuan Li,

Shunsuke Saito,

Igor Santesteban,

Javier Romero,

Rohan Joshi,

Hongdong Li,

Jason Saragih,

Yaser Sheikh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Rong and Prada, Fabian and Wang, Ziyan and Jiang, Zhongshi and Yin, Chengxiang and Li, Junxuan and Saito, Shunsuke and Santesteban, Igor and Romero, Javier and Joshi, Rohan and Li, Hongdong and Saragih, Jason and Sheikh, Yaser}, title = {FRESA: Feedforward Reconstruction of Personalized Skinned Avatars from Few Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {281-291} }
ReasonGrounder: LVLM-Guided Hierarchical Feature Splatting for Open-Vocabulary 3D Visual Grounding and Reasoning: Zhenyang Liu,

Yikai Wang,

Sixiao Zheng,

Tongying Pan,

Longfei Liang,

Yanwei Fu,

Xiangyang Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhenyang and Wang, Yikai and Zheng, Sixiao and Pan, Tongying and Liang, Longfei and Fu, Yanwei and Xue, Xiangyang}, title = {ReasonGrounder: LVLM-Guided Hierarchical Feature Splatting for Open-Vocabulary 3D Visual Grounding and Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3718-3727} }
WonderWorld: Interactive 3D Scene Generation from a Single Image: Hong-Xing Yu,

Haoyi Duan,

Charles Herrmann,

William T. Freeman,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hong-Xing and Duan, Haoyi and Herrmann, Charles and Freeman, William T. and Wu, Jiajun}, title = {WonderWorld: Interactive 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5916-5926} }
A Lightweight UDF Learning Framework for 3D Reconstruction Based on Local Shape Functions: Jiangbei Hu,

Yanggeng Li,

Fei Hou,

Junhui Hou,

Zhebin Zhang,

Shengfa Wang,

Na Lei,

Ying He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Jiangbei and Li, Yanggeng and Hou, Fei and Hou, Junhui and Zhang, Zhebin and Wang, Shengfa and Lei, Na and He, Ying}, title = {A Lightweight UDF Learning Framework for 3D Reconstruction Based on Local Shape Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1297-1307} }
DiffCAM: Data-Driven Saliency Maps by Capturing Feature Differences: Xingjian Li,

Qiming Zhao,

Neelesh Bisht,

Mostofa Rafid Uddin,

Jin Yu Kim,

Bryan Zhang,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xingjian and Zhao, Qiming and Bisht, Neelesh and Uddin, Mostofa Rafid and Kim, Jin Yu and Zhang, Bryan and Xu, Min}, title = {DiffCAM: Data-Driven Saliency Maps by Capturing Feature Differences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10327-10337} }
From Sparse Signal to Smooth Motion: Real-Time Motion Generation with Rolling Prediction Models: German Barquero,

Nadine Bertsch,

Manojkumar Marramreddy,

Carlos Chacón,

Filippo Arcadu,

Ferran Rigual,

Nicky Sijia He,

Cristina Palmero,

Sergio Escalera,

Yuting Ye,

Robin Kips; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barquero_2025_CVPR, author = {Barquero, German and Bertsch, Nadine and Marramreddy, Manojkumar and Chac\'on, Carlos and Arcadu, Filippo and Rigual, Ferran and He, Nicky Sijia and Palmero, Cristina and Escalera, Sergio and Ye, Yuting and Kips, Robin}, title = {From Sparse Signal to Smooth Motion: Real-Time Motion Generation with Rolling Prediction Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1850-1860} }
Imagine and Seek: Improving Composed Image Retrieval with an Imagined Proxy: You Li,

Fan Ma,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, You and Ma, Fan and Yang, Yi}, title = {Imagine and Seek: Improving Composed Image Retrieval with an Imagined Proxy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3984-3993} }
EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions: Kai Chen,

Yunhao Gou,

Runhui Huang,

Zhili Liu,

Daxin Tan,

Jing Xu,

Chunwei Wang,

Yi Zhu,

Yihan Zeng,

Kuo Yang,

Dingdong Wang,

Kun Xiang,

Haoyuan Li,

Haoli Bai,

Jianhua Han,

Xiaohui Li,

Weike Jin,

Nian Xie,

Yu Zhang,

James T. Kwok,

Hengshuang Zhao,

Xiaodan Liang,

Dit-Yan Yeung,

Xiao Chen,

Zhenguo Li,

Wei Zhang,

Qun Liu,

Lanqing Hong,

Lu Hou,

Hang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kai and Gou, Yunhao and Huang, Runhui and Liu, Zhili and Tan, Daxin and Xu, Jing and Wang, Chunwei and Zhu, Yi and Zeng, Yihan and Yang, Kuo and Wang, Dingdong and Xiang, Kun and Li, Haoyuan and Bai, Haoli and Han, Jianhua and Li, Xiaohui and Jin, Weike and Xie, Nian and Zhang, Yu and Kwok, James T. and Zhao, Hengshuang and Liang, Xiaodan and Yeung, Dit-Yan and Chen, Xiao and Li, Zhenguo and Zhang, Wei and Liu, Qun and Hong, Lanqing and Hou, Lu and Xu, Hang}, title = {EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5455-5466} }
Reversing Flow for Image Restoration: Haina Qin,

Wenyang Luo,

Libin Wang,

Dandan Zheng,

Jingdong Chen,

Ming Yang,

Bing Li,

Weiming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Haina and Luo, Wenyang and Wang, Libin and Zheng, Dandan and Chen, Jingdong and Yang, Ming and Li, Bing and Hu, Weiming}, title = {Reversing Flow for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7545-7558} }
Shadow Generation Using Diffusion Model with Geometry Prior: Haonan Zhao,

Qingyang Liu,

Xinhao Tao,

Li Niu,

Guangtao Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Haonan and Liu, Qingyang and Tao, Xinhao and Niu, Li and Zhai, Guangtao}, title = {Shadow Generation Using Diffusion Model with Geometry Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7603-7612} }
Rethinking Epistemic and Aleatoric Uncertainty for Active Open-Set Annotation: An Energy-Based Approach: Chen-Chen Zong,

Sheng-Jun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zong_2025_CVPR, author = {Zong, Chen-Chen and Huang, Sheng-Jun}, title = {Rethinking Epistemic and Aleatoric Uncertainty for Active Open-Set Annotation: An Energy-Based Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10153-10162} }
Any3DIS: Class-Agnostic 3D Instance Segmentation by 2D Mask Tracking: Phuc Nguyen,

Minh Luu,

Anh Tran,

Cuong Pham,

Khoi Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Phuc and Luu, Minh and Tran, Anh and Pham, Cuong and Nguyen, Khoi}, title = {Any3DIS: Class-Agnostic 3D Instance Segmentation by 2D Mask Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3636-3645} }
FDS: Frequency-Aware Denoising Score for Text-Guided Latent Diffusion Image Editing: Yufan Ren,

Zicong Jiang,

Tong Zhang,

Søren Forchhammer,

Sabine Süsstrunk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Yufan and Jiang, Zicong and Zhang, Tong and Forchhammer, S{\o}ren and S\"usstrunk, Sabine}, title = {FDS: Frequency-Aware Denoising Score for Text-Guided Latent Diffusion Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2651-2660} }
MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling: Jian Yang,

Dacheng Yin,

Yizhou Zhou,

Fengyun Rao,

Wei Zhai,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jian and Yin, Dacheng and Zhou, Yizhou and Rao, Fengyun and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7974-7985} }
ROS-SAM: High-Quality Interactive Segmentation for Remote Sensing Moving Object: Zhe Shan,

Yang Liu,

Lei Zhou,

Cheng Yan,

Heng Wang,

Xia Xie; [pdf] [supp]
[bibtex]
@InProceedings{Shan_2025_CVPR, author = {Shan, Zhe and Liu, Yang and Zhou, Lei and Yan, Cheng and Wang, Heng and Xie, Xia}, title = {ROS-SAM: High-Quality Interactive Segmentation for Remote Sensing Moving Object}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3625-3635} }
MI-DETR: An Object Detection Model with Multi-time Inquiries Mechanism: Zhixiong Nan,

Xianghong Li,

Jifeng Dai,

Tao Xiang; [pdf] [supp]
[bibtex]
@InProceedings{Nan_2025_CVPR, author = {Nan, Zhixiong and Li, Xianghong and Dai, Jifeng and Xiang, Tao}, title = {MI-DETR: An Object Detection Model with Multi-time Inquiries Mechanism}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4703-4712} }
Synthetic Visual Genome: Jae Sung Park,

Zixian Ma,

Linjie Li,

Chenhao Zheng,

Cheng-Yu Hsieh,

Ximing Lu,

Khyathi Chandu,

Quan Kong,

Norimasa Kobori,

Ali Farhadi,

Yejin Choi,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jae Sung and Ma, Zixian and Li, Linjie and Zheng, Chenhao and Hsieh, Cheng-Yu and Lu, Ximing and Chandu, Khyathi and Kong, Quan and Kobori, Norimasa and Farhadi, Ali and Choi, Yejin and Krishna, Ranjay}, title = {Synthetic Visual Genome}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9073-9086} }
Stop Learning it all to Mitigate Visual Hallucination, Focus on the Hallucination Target.: Dokyoon Yoon,

Youngsook Song,

Woomyoung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2025_CVPR, author = {Yoon, Dokyoon and Song, Youngsook and Park, Woomyoung}, title = {Stop Learning it all to Mitigate Visual Hallucination, Focus on the Hallucination Target.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4200-4208} }
Seeing the Abstract: Translating the Abstract Language for Vision Language Models: Davide Talon,

Federico Girella,

Ziyue Liu,

Marco Cristani,

Yiming Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Talon_2025_CVPR, author = {Talon, Davide and Girella, Federico and Liu, Ziyue and Cristani, Marco and Wang, Yiming}, title = {Seeing the Abstract: Translating the Abstract Language for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9253-9262} }
One-Step Event-Driven High-Speed Autofocus: Yuhan Bao,

Shaohua Gao,

Wenyong Li,

Kaiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2025_CVPR, author = {Bao, Yuhan and Gao, Shaohua and Li, Wenyong and Wang, Kaiwei}, title = {One-Step Event-Driven High-Speed Autofocus}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6222-6230} }
PanDA: Towards Panoramic Depth Anything with Unlabeled Panoramas and Mobius Spatial Augmentation: Zidong Cao,

Jinjing Zhu,

Weiming Zhang,

Hao Ai,

Haotian Bai,

Hengshuang Zhao,

Lin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Zidong and Zhu, Jinjing and Zhang, Weiming and Ai, Hao and Bai, Haotian and Zhao, Hengshuang and Wang, Lin}, title = {PanDA: Towards Panoramic Depth Anything with Unlabeled Panoramas and Mobius Spatial Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {982-992} }
Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture: Xuanchen Li,

Jianyu Wang,

Yuhao Cheng,

Yikun Zeng,

Xingyu Ren,

Wenhan Zhu,

Weiming Zhao,

Yichao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xuanchen and Wang, Jianyu and Cheng, Yuhao and Zeng, Yikun and Ren, Xingyu and Zhu, Wenhan and Zhao, Weiming and Yan, Yichao}, title = {Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {204-214} }
Scene Splatter: Momentum 3D Scene Generation from Single Image with Video Diffusion Model: Shengjun Zhang,

Jinzhao Li,

Xin Fei,

Hao Liu,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shengjun and Li, Jinzhao and Fei, Xin and Liu, Hao and Duan, Yueqi}, title = {Scene Splatter: Momentum 3D Scene Generation from Single Image with Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6089-6098} }
JiSAM: Alleviate Labeling Burden and Corner Case Problems in Autonomous Driving via Minimal Real-World Data: Runjian Chen,

Wenqi Shao,

Bo Zhang,

Shaoshuai Shi,

Li Jiang,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Runjian and Shao, Wenqi and Zhang, Bo and Shi, Shaoshuai and Jiang, Li and Luo, Ping}, title = {JiSAM: Alleviate Labeling Burden and Corner Case Problems in Autonomous Driving via Minimal Real-World Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6792-6801} }
OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction: Gehui Li,

Bin Chen,

Chen Zhao,

Lei Zhang,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Gehui and Chen, Bin and Zhao, Chen and Zhang, Lei and Zhang, Jian}, title = {OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7480-7490} }
RandAR: Decoder-only Autoregressive Visual Generation in Random Orders: Ziqi Pang,

Tianyuan Zhang,

Fujun Luan,

Yunze Man,

Hao Tan,

Kai Zhang,

William T. Freeman,

Yu-Xiong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Ziqi and Zhang, Tianyuan and Luan, Fujun and Man, Yunze and Tan, Hao and Zhang, Kai and Freeman, William T. and Wang, Yu-Xiong}, title = {RandAR: Decoder-only Autoregressive Visual Generation in Random Orders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {45-55} }
Vid2Sim: Realistic and Interactive Simulation from Video for Urban Navigation: Ziyang Xie,

Zhizheng Liu,

Zhenghao Peng,

Wayne Wu,

Bolei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Ziyang and Liu, Zhizheng and Peng, Zhenghao and Wu, Wayne and Zhou, Bolei}, title = {Vid2Sim: Realistic and Interactive Simulation from Video for Urban Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1581-1591} }
Type-R: Automatically Retouching Typos for Text-to-Image Generation: Wataru Shimoda,

Naoto Inoue,

Daichi Haraguchi,

Hayato Mitani,

Seiichi Uchida,

Kota Yamaguchi; [pdf] [supp]
[bibtex]
@InProceedings{Shimoda_2025_CVPR, author = {Shimoda, Wataru and Inoue, Naoto and Haraguchi, Daichi and Mitani, Hayato and Uchida, Seiichi and Yamaguchi, Kota}, title = {Type-R: Automatically Retouching Typos for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2745-2754} }
Video-3D LLM: Learning Position-Aware Video Representation for 3D Scene Understanding: Duo Zheng,

Shijia Huang,

Liwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Duo and Huang, Shijia and Wang, Liwei}, title = {Video-3D LLM: Learning Position-Aware Video Representation for 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8995-9006} }
Single Domain Generalization for Few-Shot Counting via Universal Representation Matching: Xianing Chen,

Si Huo,

Borui Jiang,

Hailin Hu,

Xinghao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xianing and Huo, Si and Jiang, Borui and Hu, Hailin and Chen, Xinghao}, title = {Single Domain Generalization for Few-Shot Counting via Universal Representation Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4639-4649} }
Discovering Hidden Visual Concepts Beyond Linguistic Input in Infant Learning: Xueyi Ke,

Satoshi Tsutsui,

Yayun Zhang,

Bihan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Xueyi and Tsutsui, Satoshi and Zhang, Yayun and Wen, Bihan}, title = {Discovering Hidden Visual Concepts Beyond Linguistic Input in Infant Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4343-4352} }
Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild: Damien Teney,

Liangze Jiang,

Florin Gogianu,

Ehsan Abbasnejad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teney_2025_CVPR, author = {Teney, Damien and Jiang, Liangze and Gogianu, Florin and Abbasnejad, Ehsan}, title = {Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {79-90} }
A General Adaptive Dual-level Weighting Mechanism for Remote Sensing Pansharpening: Jie Huang,

Haorui Chen,

Jiaxuan Ren,

Siran Peng,

Liangjian Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jie and Chen, Haorui and Ren, Jiaxuan and Peng, Siran and Deng, Liangjian}, title = {A General Adaptive Dual-level Weighting Mechanism for Remote Sensing Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7447-7456} }
RASP: Revisiting 3D Anamorphic Art for Shadow-Guided Packing of Irregular Objects: Soumyaratna Debnath,

Ashish Tiwari,

Kaustubh Sadekar,

Shanmuganathan Raman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Debnath_2025_CVPR, author = {Debnath, Soumyaratna and Tiwari, Ashish and Sadekar, Kaustubh and Raman, Shanmuganathan}, title = {RASP: Revisiting 3D Anamorphic Art for Shadow-Guided Packing of Irregular Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5849-5858} }
Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality Gap Between Text and Vision for Zero-Shot Image Captioning: Jeong Ryong Lee,

Yejee Shin,

Geonhui Son,

Dosik Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jeong Ryong and Shin, Yejee and Son, Geonhui and Hwang, Dosik}, title = {Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality Gap Between Text and Vision for Zero-Shot Image Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4050-4059} }
MODfinity: Unsupervised Domain Adaptation with Multimodal Information Flow Intertwining: Shanglin Liu,

Jianming Lv,

Jingdan Kang,

Huaidong Zhang,

Zequan Liang,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shanglin and Lv, Jianming and Kang, Jingdan and Zhang, Huaidong and Liang, Zequan and He, Shengfeng}, title = {MODfinity: Unsupervised Domain Adaptation with Multimodal Information Flow Intertwining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5092-5101} }
Towards Universal Soccer Video Understanding: Jiayuan Rao,

Haoning Wu,

Hao Jiang,

Ya Zhang,

Yanfeng Wang,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rao_2025_CVPR, author = {Rao, Jiayuan and Wu, Haoning and Jiang, Hao and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {Towards Universal Soccer Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8384-8394} }
Enhancing Privacy-Utility Trade-offs to Mitigate Memorization in Diffusion Models: Chen Chen,

Daochang Liu,

Mubarak Shah,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Chen and Liu, Daochang and Shah, Mubarak and Xu, Chang}, title = {Enhancing Privacy-Utility Trade-offs to Mitigate Memorization in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8182-8191} }
NVComposer: Boosting Generative Novel View Synthesis with Multiple Sparse and Unposed Images: Lingen Li,

Zhaoyang Zhang,

Yaowei Li,

Jiale Xu,

Wenbo Hu,

Xiaoyu Li,

Weihao Cheng,

Jinwei Gu,

Tianfan Xue,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Lingen and Zhang, Zhaoyang and Li, Yaowei and Xu, Jiale and Hu, Wenbo and Li, Xiaoyu and Cheng, Weihao and Gu, Jinwei and Xue, Tianfan and Shan, Ying}, title = {NVComposer: Boosting Generative Novel View Synthesis with Multiple Sparse and Unposed Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {777-787} }
Efficient Personalization of Quantized Diffusion Model without Backpropagation: Hoigi Seo,

Wongi Jeong,

Kyungryeol Lee,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2025_CVPR, author = {Seo, Hoigi and Jeong, Wongi and Lee, Kyungryeol and Chun, Se Young}, title = {Efficient Personalization of Quantized Diffusion Model without Backpropagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7717-7727} }
Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space: Yifan Zhou,

Zeqi Xiao,

Shuai Yang,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yifan and Xiao, Zeqi and Yang, Shuai and Pan, Xingang}, title = {Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {34-44} }
KVQ: Boosting Video Quality Assessment via Saliency-guided Local Perception: Yunpeng Qu,

Kun Yuan,

Qizhi Xie,

Ming Sun,

Chao Zhou,

Jian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Yunpeng and Yuan, Kun and Xie, Qizhi and Sun, Ming and Zhou, Chao and Wang, Jian}, title = {KVQ: Boosting Video Quality Assessment via Saliency-guided Local Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2150-2160} }
Learning Flow Fields in Attention for Controllable Person Image Generation: Zijian Zhou,

Shikun Liu,

Xiao Han,

Haozhe Liu,

Kam Woh Ng,

Tian Xie,

Yuren Cong,

Hang Li,

Mengmeng Xu,

Juan-Manuel Perez-Rua,

Aditya Patel,

Tao Xiang,

Miaojing Shi,

Sen He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zijian and Liu, Shikun and Han, Xiao and Liu, Haozhe and Ng, Kam Woh and Xie, Tian and Cong, Yuren and Li, Hang and Xu, Mengmeng and Perez-Rua, Juan-Manuel and Patel, Aditya and Xiang, Tao and Shi, Miaojing and He, Sen}, title = {Learning Flow Fields in Attention for Controllable Person Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2491-2501} }
Early-Bird Diffusion: Investigating and Leveraging Timestep-Aware Early-Bird Tickets in Diffusion Models for Efficient Training: Lexington Whalen,

Zhenbang Du,

Haoran You,

Chaojian Li,

Sixu Li,

Yingyan Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Whalen_2025_CVPR, author = {Whalen, Lexington and Du, Zhenbang and You, Haoran and Li, Chaojian and Li, Sixu and Lin, Yingyan}, title = {Early-Bird Diffusion: Investigating and Leveraging Timestep-Aware Early-Bird Tickets in Diffusion Models for Efficient Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7675-7684} }
DIV-FF: Dynamic Image-Video Feature Fields For Environment Understanding in Egocentric Videos: Lorenzo Mur-Labadia,

Josechu Guerrero,

Ruben Martinez-Cantin; [pdf] [supp]
[bibtex]
@InProceedings{Mur-Labadia_2025_CVPR, author = {Mur-Labadia, Lorenzo and Guerrero, Josechu and Martinez-Cantin, Ruben}, title = {DIV-FF: Dynamic Image-Video Feature Fields For Environment Understanding in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3470-3480} }
EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language Models: Yinan Liang,

Ziwei Wang,

Xiuwei Xu,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yinan and Wang, Ziwei and Xu, Xiuwei and Zhou, Jie and Lu, Jiwen}, title = {EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9445-9454} }
OnlineAnySeg: Online Zero-Shot 3D Segmentation by Visual Foundation Model Guided 2D Mask Merging: Yijie Tang,

Jiazhao Zhang,

Yuqing Lan,

Yulan Guo,

Dezun Dong,

Chenyang Zhu,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yijie and Zhang, Jiazhao and Lan, Yuqing and Guo, Yulan and Dong, Dezun and Zhu, Chenyang and Xu, Kai}, title = {OnlineAnySeg: Online Zero-Shot 3D Segmentation by Visual Foundation Model Guided 2D Mask Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3676-3685} }
Tora: Trajectory-oriented Diffusion Transformer for Video Generation: Zhenghao Zhang,

Junchao Liao,

Menghao Li,

ZuoZhuo Dai,

Bingxue Qiu,

Siyu Zhu,

Long Qin,

Weizhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhenghao and Liao, Junchao and Li, Menghao and Dai, ZuoZhuo and Qiu, Bingxue and Zhu, Siyu and Qin, Long and Wang, Weizhi}, title = {Tora: Trajectory-oriented Diffusion Transformer for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2063-2073} }
Morpheus: Text-Driven 3D Gaussian Splat Shape and Color Stylization: Jamie Wynn,

Zawar Qureshi,

Jakub Powierza,

Jamie Watson,

Mohamed Sayed; [pdf] [arXiv]
[bibtex]
@InProceedings{Wynn_2025_CVPR, author = {Wynn, Jamie and Qureshi, Zawar and Powierza, Jakub and Watson, Jamie and Sayed, Mohamed}, title = {Morpheus: Text-Driven 3D Gaussian Splat Shape and Color Stylization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7825-7836} }
Generalized Diffusion Detector: Mining Robust Features from Diffusion Models for Domain-Generalized Detection: Boyong He,

Yuxiang Ji,

Qianwen Ye,

Zhuoyue Tan,

Liaoni Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Boyong and Ji, Yuxiang and Ye, Qianwen and Tan, Zhuoyue and Wu, Liaoni}, title = {Generalized Diffusion Detector: Mining Robust Features from Diffusion Models for Domain-Generalized Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9921-9932} }
Enhancing Dance-to-Music Generation via Negative Conditioning Latent Diffusion Model: Changchang Sun,

Gaowen Liu,

Charles Fleming,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Changchang and Liu, Gaowen and Fleming, Charles and Yan, Yan}, title = {Enhancing Dance-to-Music Generation via Negative Conditioning Latent Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8321-8330} }
Neuro-Symbolic Evaluation of Text-to-Video Models using Formal Verification: S P Sharan,

Minkyu Choi,

Sahil Shah,

Harsh Goel,

Mohammad Omama,

Sandeep Chinchali; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharan_2025_CVPR, author = {Sharan, S P and Choi, Minkyu and Shah, Sahil and Goel, Harsh and Omama, Mohammad and Chinchali, Sandeep}, title = {Neuro-Symbolic Evaluation of Text-to-Video Models using Formal Verification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8395-8405} }
Spherical Manifold Guided Diffusion Model for Panoramic Image Generation: Xiancheng Sun,

Mai Xu,

Shengxi Li,

Senmao Ma,

Xin Deng,

Lai Jiang,

Gang Shen; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Xiancheng and Xu, Mai and Li, Shengxi and Ma, Senmao and Deng, Xin and Jiang, Lai and Shen, Gang}, title = {Spherical Manifold Guided Diffusion Model for Panoramic Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5824-5834} }
Rethinking Query-based Transformer for Continual Image Segmentation: Yuchen Zhu,

Cheng Shi,

Dingyou Wang,

Jiajin Tang,

Zhengxuan Wei,

Yu Wu,

Guanbin Li,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yuchen and Shi, Cheng and Wang, Dingyou and Tang, Jiajin and Wei, Zhengxuan and Wu, Yu and Li, Guanbin and Yang, Sibei}, title = {Rethinking Query-based Transformer for Continual Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4595-4606} }
SlideChat: A Large Vision-Language Assistant for Whole-Slide Pathology Image Understanding: Ying Chen,

Guoan Wang,

Yuanfeng Ji,

Yanjun Li,

Jin Ye,

Tianbin Li,

Ming Hu,

Rongshan Yu,

Yu Qiao,

Junjun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ying and Wang, Guoan and Ji, Yuanfeng and Li, Yanjun and Ye, Jin and Li, Tianbin and Hu, Ming and Yu, Rongshan and Qiao, Yu and He, Junjun}, title = {SlideChat: A Large Vision-Language Assistant for Whole-Slide Pathology Image Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5134-5143} }
Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding: Wenxuan Guo,

Xiuwei Xu,

Ziwei Wang,

Jianjiang Feng,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Wenxuan and Xu, Xiuwei and Wang, Ziwei and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3666-3675} }
Common3D: Self-Supervised Learning of 3D Morphable Models for Common Objects in Neural Feature Space: Leonhard Sommer,

Olaf Dünkel,

Christian Theobalt,

Adam Kortylewski; [pdf] [supp]
[bibtex]
@InProceedings{Sommer_2025_CVPR, author = {Sommer, Leonhard and D\"unkel, Olaf and Theobalt, Christian and Kortylewski, Adam}, title = {Common3D: Self-Supervised Learning of 3D Morphable Models for Common Objects in Neural Feature Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6468-6479} }
ECVC: Exploiting Non-Local Correlations in Multiple Frames for Contextual Video Compression: Wei Jiang,

Junru Li,

Kai Zhang,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Wei and Li, Junru and Zhang, Kai and Zhang, Li}, title = {ECVC: Exploiting Non-Local Correlations in Multiple Frames for Contextual Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7331-7341} }
LinGen: Towards High-Resolution Minute-Length Text-to-Video Generation with Linear Computational Complexity: Hongjie Wang,

Chih-Yao Ma,

Yen-Cheng Liu,

Ji Hou,

Tao Xu,

Jialiang Wang,

Felix Juefei-Xu,

Yaqiao Luo,

Peizhao Zhang,

Tingbo Hou,

Peter Vajda,

Niraj K. Jha,

Xiaoliang Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hongjie and Ma, Chih-Yao and Liu, Yen-Cheng and Hou, Ji and Xu, Tao and Wang, Jialiang and Juefei-Xu, Felix and Luo, Yaqiao and Zhang, Peizhao and Hou, Tingbo and Vajda, Peter and Jha, Niraj K. and Dai, Xiaoliang}, title = {LinGen: Towards High-Resolution Minute-Length Text-to-Video Generation with Linear Computational Complexity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2578-2588} }
Towards Open-Vocabulary Audio-Visual Event Localization: Jinxing Zhou,

Dan Guo,

Ruohao Guo,

Yuxin Mao,

Jingjing Hu,

Yiran Zhong,

Xiaojun Chang,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jinxing and Guo, Dan and Guo, Ruohao and Mao, Yuxin and Hu, Jingjing and Zhong, Yiran and Chang, Xiaojun and Wang, Meng}, title = {Towards Open-Vocabulary Audio-Visual Event Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8362-8371} }
S2Gaussian: Sparse-View Super-Resolution 3D Gaussian Splatting: Yecong Wan,

Mingwen Shao,

Yuanshuo Cheng,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2025_CVPR, author = {Wan, Yecong and Shao, Mingwen and Cheng, Yuanshuo and Zuo, Wangmeng}, title = {S2Gaussian: Sparse-View Super-Resolution 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {711-721} }
HIIF: Hierarchical Encoding based Implicit Image Function for Continuous Super-resolution: Yuxuan Jiang,

Ho Man Kwan,

Tianhao Peng,

Ge Gao,

Fan Zhang,

Xiaoqing Zhu,

Joel Sole,

David Bull; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuxuan and Kwan, Ho Man and Peng, Tianhao and Gao, Ge and Zhang, Fan and Zhu, Xiaoqing and Sole, Joel and Bull, David}, title = {HIIF: Hierarchical Encoding based Implicit Image Function for Continuous Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2289-2299} }
Motion Prompting: Controlling Video Generation with Motion Trajectories: Daniel Geng,

Charles Herrmann,

Junhwa Hur,

Forrester Cole,

Serena Zhang,

Tobias Pfaff,

Tatiana Lopez-Guevara,

Yusuf Aytar,

Michael Rubinstein,

Chen Sun,

Oliver Wang,

Andrew Owens,

Deqing Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2025_CVPR, author = {Geng, Daniel and Herrmann, Charles and Hur, Junhwa and Cole, Forrester and Zhang, Serena and Pfaff, Tobias and Lopez-Guevara, Tatiana and Aytar, Yusuf and Rubinstein, Michael and Sun, Chen and Wang, Oliver and Owens, Andrew and Sun, Deqing}, title = {Motion Prompting: Controlling Video Generation with Motion Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1-12} }
VERA: Explainable Video Anomaly Detection via Verbalized Learning of Vision-Language Models: Muchao Ye,

Weiyang Liu,

Pan He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Muchao and Liu, Weiyang and He, Pan}, title = {VERA: Explainable Video Anomaly Detection via Verbalized Learning of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8679-8688} }
CCIN: Compositional Conflict Identification and Neutralization for Composed Image Retrieval: Likai Tian,

Jian Zhao,

Zechao Hu,

Zhengwei Yang,

Hao Li,

Lei Jin,

Zheng Wang,

Xuelong Li; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Likai and Zhao, Jian and Hu, Zechao and Yang, Zhengwei and Li, Hao and Jin, Lei and Wang, Zheng and Li, Xuelong}, title = {CCIN: Compositional Conflict Identification and Neutralization for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3974-3983} }
OverLoCK: An Overview-first-Look-Closely-next ConvNet with Context-Mixing Dynamic Kernels: Meng Lou,

Yizhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2025_CVPR, author = {Lou, Meng and Yu, Yizhou}, title = {OverLoCK: An Overview-first-Look-Closely-next ConvNet with Context-Mixing Dynamic Kernels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {128-138} }
Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality: Liyan Chen,

Gregory P. Meyer,

Zaiwei Zhang,

Eric M. Wolff,

Paul Vernaza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Liyan and Meyer, Gregory P. and Zhang, Zaiwei and Wolff, Eric M. and Vernaza, Paul}, title = {Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6595-6604} }
Comprehensive Relighting: Generalizable and Consistent Monocular Human Relighting and Harmonization: Junying Wang,

Jingyuan Liu,

Xin Sun,

Krishna Kumar Singh,

Zhixin Shu,

He Zhang,

Jimei Yang,

Nanxuan Zhao,

Tuanfeng Y. Wang,

Simon S. Chen,

Ulrich Neumann,

Jae Shin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Junying and Liu, Jingyuan and Sun, Xin and Singh, Krishna Kumar and Shu, Zhixin and Zhang, He and Yang, Jimei and Zhao, Nanxuan and Wang, Tuanfeng Y. and Chen, Simon S. and Neumann, Ulrich and Yoon, Jae Shin}, title = {Comprehensive Relighting: Generalizable and Consistent Monocular Human Relighting and Harmonization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {380-390} }
MAtCha Gaussians: Atlas of Charts for High-Quality Geometry and Photorealism From Sparse Views: Antoine Guedon,

Tomoki Ichikawa,

Kohei Yamashita,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guedon_2025_CVPR, author = {Guedon, Antoine and Ichikawa, Tomoki and Yamashita, Kohei and Nishino, Ko}, title = {MAtCha Gaussians: Atlas of Charts for High-Quality Geometry and Photorealism From Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6001-6011} }
Extreme Rotation Estimation in the Wild: Hana Bezalel,

Dotan Ankri,

Ruojin Cai,

Hadar Averbach-Elor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bezalel_2025_CVPR, author = {Bezalel, Hana and Ankri, Dotan and Cai, Ruojin and Averbach-Elor, Hadar}, title = {Extreme Rotation Estimation in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1061-1070} }
Traversing Distortion-Perception Tradeoff using a Single Score-Based Generative Model: Yuhan Wang,

Suzhi Bi,

Ying-Jun Angela Zhang,

Xiaojun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhan and Bi, Suzhi and Zhang, Ying-Jun Angela and Yuan, Xiaojun}, title = {Traversing Distortion-Perception Tradeoff using a Single Score-Based Generative Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2377-2386} }
Task-Agnostic Guided Feature Expansion for Class-Incremental Learning: Bowen Zheng,

Da-Wei Zhou,

Han-Jia Ye,

De-Chuan Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Bowen and Zhou, Da-Wei and Ye, Han-Jia and Zhan, De-Chuan}, title = {Task-Agnostic Guided Feature Expansion for Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10099-10109} }
Let's Chorus: Partner-aware Hybrid Song-Driven 3D Head Animation: Xiumei Xie,

Zikai Huang,

Wenhao Xu,

Peng Xiao,

Xuemiao Xu,

Huaidong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Xiumei and Huang, Zikai and Xu, Wenhao and Xiao, Peng and Xu, Xuemiao and Zhang, Huaidong}, title = {Let's Chorus: Partner-aware Hybrid Song-Driven 3D Head Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5467-5476} }
Twinner: Shining Light on Digital Twins in a Few Snaps: Jesus Zarzar,

Tom Monnier,

Roman Shapovalov,

Andrea Vedaldi,

David Novotny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zarzar_2025_CVPR, author = {Zarzar, Jesus and Monnier, Tom and Shapovalov, Roman and Vedaldi, Andrea and Novotny, David}, title = {Twinner: Shining Light on Digital Twins in a Few Snaps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5859-5869} }
MonoPlace3D: Learning 3D-Aware Object Placement for 3D Monocular Detection: Rishubh Parihar,

Srinjay Sarkar,

Sarthak Vora,

Jogendra Nath Kundu,

R. Venkatesh Babu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parihar_2025_CVPR, author = {Parihar, Rishubh and Sarkar, Srinjay and Vora, Sarthak and Kundu, Jogendra Nath and Babu, R. Venkatesh}, title = {MonoPlace3D: Learning 3D-Aware Object Placement for 3D Monocular Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6531-6541} }
SGCR: Spherical Gaussians for Efficient 3D Curve Reconstruction: Xinran Yang,

Donghao Ji,

Yuanqi Li,

Jie Guo,

Yanwen Guo,

Junyuan Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Xinran and Ji, Donghao and Li, Yuanqi and Guo, Jie and Guo, Yanwen and Xie, Junyuan}, title = {SGCR: Spherical Gaussians for Efficient 3D Curve Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5793-5803} }
Advancing Semantic Future Prediction through Multimodal Visual Sequence Transformers: Efstathios Karypidis,

Ioannis Kakogeorgiou,

Spyros Gidaris,

Nikos Komodakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karypidis_2025_CVPR, author = {Karypidis, Efstathios and Kakogeorgiou, Ioannis and Gidaris, Spyros and Komodakis, Nikos}, title = {Advancing Semantic Future Prediction through Multimodal Visual Sequence Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3793-3803} }
On the Out-Of-Distribution Generalization of Large Multimodal Models: Xingxuan Zhang,

Jiansheng Li,

Wenjing Chu,

junjia hai,

Renzhe Xu,

Yuqing Yang,

Shikai Guan,

Jiazheng Xu,

Liping Jing,

Peng Cui; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xingxuan and Li, Jiansheng and Chu, Wenjing and hai, junjia and Xu, Renzhe and Yang, Yuqing and Guan, Shikai and Xu, Jiazheng and Jing, Liping and Cui, Peng}, title = {On the Out-Of-Distribution Generalization of Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10315-10326} }
Enhanced Contrastive Learning with Multi-view Longitudinal Data for Chest X-ray Report Generation: Kang Liu,

Zhuoqi Ma,

Xiaolu Kang,

Yunan Li,

Kun Xie,

Zhicheng Jiao,

Qiguang Miao; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Kang and Ma, Zhuoqi and Kang, Xiaolu and Li, Yunan and Xie, Kun and Jiao, Zhicheng and Miao, Qiguang}, title = {Enhanced Contrastive Learning with Multi-view Longitudinal Data for Chest X-ray Report Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10348-10359} }
Scaling Inference Time Compute for Diffusion Models: Nanye Ma,

Shangyuan Tong,

Haolin Jia,

Hexiang Hu,

Yu-Chuan Su,

Mingda Zhang,

Xuan Yang,

Yandong Li,

Tommi Jaakkola,

Xuhui Jia,

Saining Xie; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Nanye and Tong, Shangyuan and Jia, Haolin and Hu, Hexiang and Su, Yu-Chuan and Zhang, Mingda and Yang, Xuan and Li, Yandong and Jaakkola, Tommi and Jia, Xuhui and Xie, Saining}, title = {Scaling Inference Time Compute for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2523-2534} }
Chat-based Person Retrieval via Dialogue-Refined Cross-Modal Alignment: Yang Bai,

Yucheng Ji,

Min Cao,

Jinqiao Wang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Yang and Ji, Yucheng and Cao, Min and Wang, Jinqiao and Ye, Mang}, title = {Chat-based Person Retrieval via Dialogue-Refined Cross-Modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3952-3962} }
AVF-MAE++: Scaling Affective Video Facial Masked Autoencoders via Efficient Audio-Visual Self-Supervised Learning: Xuecheng Wu,

Heli Sun,

Yifan Wang,

Jiayu Nie,

Jie Zhang,

Yabing Wang,

Junxiao Xue,

Liang He; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Xuecheng and Sun, Heli and Wang, Yifan and Nie, Jiayu and Zhang, Jie and Wang, Yabing and Xue, Junxiao and He, Liang}, title = {AVF-MAE++: Scaling Affective Video Facial Masked Autoencoders via Efficient Audio-Visual Self-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9142-9153} }
DiffusionSfM: Predicting Structure and Motion via Ray Origin and Endpoint Diffusion: Qitao Zhao,

Amy Lin,

Jeff Tan,

Jason Y. Zhang,

Deva Ramanan,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Qitao and Lin, Amy and Tan, Jeff and Zhang, Jason Y. and Ramanan, Deva and Tulsiani, Shubham}, title = {DiffusionSfM: Predicting Structure and Motion via Ray Origin and Endpoint Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6317-6326} }
GroundingFace: Fine-grained Face Understanding via Pixel Grounding Multimodal Large Language Model: Yue Han,

Jiangning Zhang,

Junwei Zhu,

Runze Hou,

Xiaozhong Ji,

Chuming Lin,

Xiaobin Hu,

Zhucun Xue,

Yong Liu; [pdf]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Yue and Zhang, Jiangning and Zhu, Junwei and Hou, Runze and Ji, Xiaozhong and Lin, Chuming and Hu, Xiaobin and Xue, Zhucun and Liu, Yong}, title = {GroundingFace: Fine-grained Face Understanding via Pixel Grounding Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3942-3951} }
DynFocus: Dynamic Cooperative Network Empowers LLMs with Video Understanding: Yudong Han,

Qingpei Guo,

Liyuan Pan,

Liu Liu,

Yu Guan,

Ming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Yudong and Guo, Qingpei and Pan, Liyuan and Liu, Liu and Guan, Yu and Yang, Ming}, title = {DynFocus: Dynamic Cooperative Network Empowers LLMs with Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8512-8522} }
V-Stylist: Video Stylization via Collaboration and Reflection of MLLM Agents: Zhengrong Yue,

Shaobin Zhuang,

Kunchang Li,

Yanbo Ding,

Yali Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yue_2025_CVPR, author = {Yue, Zhengrong and Zhuang, Shaobin and Li, Kunchang and Ding, Yanbo and Wang, Yali}, title = {V-Stylist: Video Stylization via Collaboration and Reflection of MLLM Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3195-3205} }
ID-Patch: Robust ID Association for Group Photo Personalization: Yimeng Zhang,

Tiancheng Zhi,

Jing Liu,

Shen Sang,

Liming Jiang,

Qing Yan,

Sijia Liu,

Linjie Luo; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yimeng and Zhi, Tiancheng and Liu, Jing and Sang, Shen and Jiang, Liming and Yan, Qing and Liu, Sijia and Luo, Linjie}, title = {ID-Patch: Robust ID Association for Group Photo Personalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2986-2996} }
iG-6DoF: Model-free 6DoF Pose Estimation for Unseen Object via Iterative 3D Gaussian Splatting: Tuo Cao,

Fei Luo,

Jiongming Qin,

Yu Jiang,

Yusen Wang,

Chunxia Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Tuo and Luo, Fei and Qin, Jiongming and Jiang, Yu and Wang, Yusen and Xiao, Chunxia}, title = {iG-6DoF: Model-free 6DoF Pose Estimation for Unseen Object via Iterative 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6436-6446} }
ForestLPR: LiDAR Place Recognition in Forests Attentioning Multiple BEV Density Images: Yanqing Shen,

Turcan Tuna,

Marco Hutter,

Cesar Cadena,

Nanning Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Yanqing and Tuna, Turcan and Hutter, Marco and Cadena, Cesar and Zheng, Nanning}, title = {ForestLPR: LiDAR Place Recognition in Forests Attentioning Multiple BEV Density Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6659-6669} }
AdaMMS: Model Merging for Heterogeneous Multimodal Large Language Models with Unsupervised Coefficient Optimization: Yiyang Du,

Xiaochen Wang,

Chi Chen,

Jiabo Ye,

Yiru Wang,

Peng Li,

Ming Yan,

Ji Zhang,

Fei Huang,

Zhifang Sui,

Maosong Sun,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Yiyang and Wang, Xiaochen and Chen, Chi and Ye, Jiabo and Wang, Yiru and Li, Peng and Yan, Ming and Zhang, Ji and Huang, Fei and Sui, Zhifang and Sun, Maosong and Liu, Yang}, title = {AdaMMS: Model Merging for Heterogeneous Multimodal Large Language Models with Unsupervised Coefficient Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9413-9422} }
SLVR: Super-Light Visual Reconstruction via Blueprint Controllable Convolutions and Exploring Feature Diversity Representation: Ning Ni,

Libao Zhang; [pdf]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Ning and Zhang, Libao}, title = {SLVR: Super-Light Visual Reconstruction via Blueprint Controllable Convolutions and Exploring Feature Diversity Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {400-410} }
Layered Image Vectorization via Semantic Simplification: Zhenyu Wang,

Jianxi Huang,

Zhida Sun,

Yuanhao Gong,

Daniel Cohen-Or,

Min Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenyu and Huang, Jianxi and Sun, Zhida and Gong, Yuanhao and Cohen-Or, Daniel and Lu, Min}, title = {Layered Image Vectorization via Semantic Simplification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7728-7738} }
Hearing Anywhere in Any Environment: Xiulong Liu,

Anurag Kumar,

Paul Calamia,

Sebastia V. Amengual,

Calvin Murdock,

Ishwarya Ananthabhotla,

Philip Robinson,

Eli Shlizerman,

Vamsi Krishna Ithapu,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xiulong and Kumar, Anurag and Calamia, Paul and Amengual, Sebastia V. and Murdock, Calvin and Ananthabhotla, Ishwarya and Robinson, Philip and Shlizerman, Eli and Ithapu, Vamsi Krishna and Gao, Ruohan}, title = {Hearing Anywhere in Any Environment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5732-5741} }
Automated Proof of Polynomial Inequalities via Reinforcement Learning: Banglong Liu,

Niuniu Qi,

Xia Zeng,

Lydia Dehbi,

Zhengfeng Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Banglong and Qi, Niuniu and Zeng, Xia and Dehbi, Lydia and Yang, Zhengfeng}, title = {Automated Proof of Polynomial Inequalities via Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5052-5060} }
SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2% Training Cost: Haiyang Mei,

Pengyu Zhang,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Haiyang and Zhang, Pengyu and Shou, Mike Zheng}, title = {SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2\% Training Cost}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3417-3426} }
How Do I Do That? Synthesizing 3D Hand Motion and Contacts for Everyday Interactions: Aditya Prakash,

Benjamin Lundell,

Dmitry Andreychuk,

David Forsyth,

Saurabh Gupta,

Harpreet Sawhney; [pdf] [arXiv]
[bibtex]
@InProceedings{Prakash_2025_CVPR, author = {Prakash, Aditya and Lundell, Benjamin and Andreychuk, Dmitry and Forsyth, David and Gupta, Saurabh and Sawhney, Harpreet}, title = {How Do I Do That? Synthesizing 3D Hand Motion and Contacts for Everyday Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7026-7036} }
Joint Vision-Language Social Bias Removal for CLIP: Haoyu Zhang,

Yangyang Guo,

Mohan Kankanhalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haoyu and Guo, Yangyang and Kankanhalli, Mohan}, title = {Joint Vision-Language Social Bias Removal for CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4246-4255} }
MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds: Zhenggang Tang,

Yuchen Fan,

Dilin Wang,

Hongyu Xu,

Rakesh Ranjan,

Alexander Schwing,

Zhicheng Yan; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Zhenggang and Fan, Yuchen and Wang, Dilin and Xu, Hongyu and Ranjan, Rakesh and Schwing, Alexander and Yan, Zhicheng}, title = {MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5283-5293} }
Explicit Depth-Aware Blurry Video Frame Interpolation Guided by Differential Curves: Zaoming Yan,

Pengcheng Lei,

Tingting Wang,

Faming Fang,

Junkang Zhang,

Yaomin Huang,

Haichuan Song; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Zaoming and Lei, Pengcheng and Wang, Tingting and Fang, Faming and Zhang, Junkang and Huang, Yaomin and Song, Haichuan}, title = {Explicit Depth-Aware Blurry Video Frame Interpolation Guided by Differential Curves}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1994-2004} }
MonSter: Marry Monodepth to Stereo Unleashes Power: Junda Cheng,

Longliang Liu,

Gangwei Xu,

Xianqi Wang,

Zhaoxing Zhang,

Yong Deng,

Jinliang Zang,

Yurui Chen,

Zhipeng Cai,

Xin Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Junda and Liu, Longliang and Xu, Gangwei and Wang, Xianqi and Zhang, Zhaoxing and Deng, Yong and Zang, Jinliang and Chen, Yurui and Cai, Zhipeng and Yang, Xin}, title = {MonSter: Marry Monodepth to Stereo Unleashes Power}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6273-6282} }
A Tale of Two Classes: Adapting Supervised Contrastive Learning to Binary Imbalanced Datasets: David Mildenberger,

Paul Hager,

Daniel Rueckert,

Martin J. Menten; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mildenberger_2025_CVPR, author = {Mildenberger, David and Hager, Paul and Rueckert, Daniel and Menten, Martin J.}, title = {A Tale of Two Classes: Adapting Supervised Contrastive Learning to Binary Imbalanced Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10305-10314} }
Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection: Yun Zhu,

Le Hui,

Hang Yang,

Jianjun Qian,

Jin Xie,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yun and Hui, Le and Yang, Hang and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9911-9920} }
Open-World Amodal Appearance Completion: Jiayang Ao,

Yanbei Jiang,

Qiuhong Ke,

Krista A. Ehinger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ao_2025_CVPR, author = {Ao, Jiayang and Jiang, Yanbei and Ke, Qiuhong and Ehinger, Krista A.}, title = {Open-World Amodal Appearance Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6490-6499} }
RivuletMLP: An MLP-based Architecture for Efficient Compressed Video Quality Enhancement: Gang He,

Weiran Wang,

Guancheng Quan,

Shihao Wang,

Dajiang Zhou,

Yunsong Li; [pdf]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Gang and Wang, Weiran and Quan, Guancheng and Wang, Shihao and Zhou, Dajiang and Li, Yunsong}, title = {RivuletMLP: An MLP-based Architecture for Efficient Compressed Video Quality Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7342-7352} }
Reanimating Images using Neural Representations of Dynamic Stimuli: Jacob Yeung,

Andrew F. Luo,

Gabriel Sarch,

Margaret M. Henderson,

Deva Ramanan,

Michael J. Tarr; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeung_2025_CVPR, author = {Yeung, Jacob and Luo, Andrew F. and Sarch, Gabriel and Henderson, Margaret M. and Ramanan, Deva and Tarr, Michael J.}, title = {Reanimating Images using Neural Representations of Dynamic Stimuli}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5331-5343} }
DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos: Wenbo Hu,

Xiangjun Gao,

Xiaoyu Li,

Sijie Zhao,

Xiaodong Cun,

Yong Zhang,

Long Quan,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Wenbo and Gao, Xiangjun and Li, Xiaoyu and Zhao, Sijie and Cun, Xiaodong and Zhang, Yong and Quan, Long and Shan, Ying}, title = {DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2005-2015} }
Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face Detector: Xiao Guo,

Xiufeng Song,

Yue Zhang,

Xiaohong Liu,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Xiao and Song, Xiufeng and Zhang, Yue and Liu, Xiaohong and Liu, Xiaoming}, title = {Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {105-116} }
Active Hyperspectral Imaging Using an Event Camera: Bohan Yu,

Jinxiu Liang,

Zhuofeng Wang,

Bin Fan,

Art Subpa-asa,

Boxin Shi,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Bohan and Liang, Jinxiu and Wang, Zhuofeng and Fan, Bin and Subpa-asa, Art and Shi, Boxin and Sato, Imari}, title = {Active Hyperspectral Imaging Using an Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {929-939} }
Bridging the Gap between Gaussian Diffusion Models and Universal Quantization for Image Compression: Lucas Relic,

Roberto Azevedo,

Yang Zhang,

Markus Gross,

Christopher Schroers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Relic_2025_CVPR, author = {Relic, Lucas and Azevedo, Roberto and Zhang, Yang and Gross, Markus and Schroers, Christopher}, title = {Bridging the Gap between Gaussian Diffusion Models and Universal Quantization for Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2449-2458} }
SAMBLE: Shape-Specific Point Cloud Sampling for an Optimal Trade-Off Between Local Detail and Global Uniformity: Chengzhi Wu,

Yuxin Wan,

Hao Fu,

Julius Pfrommer,

Zeyun Zhong,

Junwei Zheng,

Jiaming Zhang,

Jürgen Beyerer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Chengzhi and Wan, Yuxin and Fu, Hao and Pfrommer, Julius and Zhong, Zeyun and Zheng, Junwei and Zhang, Jiaming and Beyerer, J\"urgen}, title = {SAMBLE: Shape-Specific Point Cloud Sampling for an Optimal Trade-Off Between Local Detail and Global Uniformity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1342-1352} }
Driving by the Rules: A Benchmark for Integrating Traffic Sign Regulations into Vectorized HD Map: Xinyuan Chang,

Maixuan Xue,

Xinran Liu,

Zheng Pan,

Xing Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Xinyuan and Xue, Maixuan and Liu, Xinran and Pan, Zheng and Wei, Xing}, title = {Driving by the Rules: A Benchmark for Integrating Traffic Sign Regulations into Vectorized HD Map}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6823-6833} }
S4-Driver: Scalable Self-Supervised Driving Multimodal Large Language Model with Spatio-Temporal Visual Representation: Yichen Xie,

Runsheng Xu,

Tong He,

Jyh-Jing Hwang,

Katie Luo,

Jingwei Ji,

Hubert Lin,

Letian Chen,

Yiren Lu,

Zhaoqi Leng,

Dragomir Anguelov,

Mingxing Tan; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yichen and Xu, Runsheng and He, Tong and Hwang, Jyh-Jing and Luo, Katie and Ji, Jingwei and Lin, Hubert and Chen, Letian and Lu, Yiren and Leng, Zhaoqi and Anguelov, Dragomir and Tan, Mingxing}, title = {S4-Driver: Scalable Self-Supervised Driving Multimodal Large Language Model with Spatio-Temporal Visual Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1622-1632} }
Science-T2I: Addressing Scientific Illusions in Image Synthesis: Jialuo Li,

Wenhao Chai,

Xingyu Fu,

Haiyang Xu,

Saining Xie; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jialuo and Chai, Wenhao and Fu, Xingyu and Xu, Haiyang and Xie, Saining}, title = {Science-T2I: Addressing Scientific Illusions in Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2734-2744} }
MoST: Efficient Monarch Sparse Tuning for 3D Representation Learning: Xu Han,

Yuan Tang,

Jinfeng Xu,

Xianzhi Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Xu and Tang, Yuan and Xu, Jinfeng and Li, Xianzhi}, title = {MoST: Efficient Monarch Sparse Tuning for 3D Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6584-6594} }
Re-thinking Temporal Search for Long-Form Video Understanding: Jinhui Ye,

Zihan Wang,

Haosen Sun,

Keshigeyan Chandrasegaran,

Zane Durante,

Cristobal Eyzaguirre,

Yonatan Bisk,

Juan Carlos Niebles,

Ehsan Adeli,

Li Fei-Fei,

Jiajun Wu,

Manling Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Jinhui and Wang, Zihan and Sun, Haosen and Chandrasegaran, Keshigeyan and Durante, Zane and Eyzaguirre, Cristobal and Bisk, Yonatan and Niebles, Juan Carlos and Adeli, Ehsan and Fei-Fei, Li and Wu, Jiajun and Li, Manling}, title = {Re-thinking Temporal Search for Long-Form Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8579-8591} }
When Domain Generalization meets Generalized Category Discovery: An Adaptive Task-Arithmetic Driven Approach: Vaibhav Rathore,

Shubhranil B,

Saikat Dutta,

Sarthak Mehrotra,

Zsolt Kira,

Biplab Banerjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rathore_2025_CVPR, author = {Rathore, Vaibhav and B, Shubhranil and Dutta, Saikat and Mehrotra, Sarthak and Kira, Zsolt and Banerjee, Biplab}, title = {When Domain Generalization meets Generalized Category Discovery: An Adaptive Task-Arithmetic Driven Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4905-4915} }
BIP3D: Bridging 2D Images and 3D Perception for Embodied Intelligence: Xuewu Lin,

Tianwei Lin,

Lichao Huang,

Hongyu Xie,

Zhizhong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Xuewu and Lin, Tianwei and Huang, Lichao and Xie, Hongyu and Su, Zhizhong}, title = {BIP3D: Bridging 2D Images and 3D Perception for Embodied Intelligence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9007-9016} }
Query Efficient Black-Box Visual Prompting with Subspace Learning: Zhaogeng Liu,

Haozhen Zhang,

Hualin Zhang,

Xingchen Li,

Wanli Shi,

Bin Gu,

Yi Chang; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaogeng and Zhang, Haozhen and Zhang, Hualin and Li, Xingchen and Shi, Wanli and Gu, Bin and Chang, Yi}, title = {Query Efficient Black-Box Visual Prompting with Subspace Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4322-4331} }
Improving Autoregressive Visual Generation with Cluster-Oriented Token Prediction: Teng Hu,

Jiangning Zhang,

Ran Yi,

Jieyu Weng,

Yabiao Wang,

Xianfang Zeng,

Zhucun Xue,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Teng and Zhang, Jiangning and Yi, Ran and Weng, Jieyu and Wang, Yabiao and Zeng, Xianfang and Xue, Zhucun and Ma, Lizhuang}, title = {Improving Autoregressive Visual Generation with Cluster-Oriented Token Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9351-9360} }
Dual-view X-ray Detection: Can AI Detect Prohibited Items from Dual-view X-ray Images like Humans?: Renshuai Tao,

Haoyu Wang,

Yuzhe Guo,

Hairong Chen,

Li Zhang,

Xianglong Liu,

Yunchao Wei,

Yao Zhao; [pdf]
[bibtex]
@InProceedings{Tao_2025_CVPR, author = {Tao, Renshuai and Wang, Haoyu and Guo, Yuzhe and Chen, Hairong and Zhang, Li and Liu, Xianglong and Wei, Yunchao and Zhao, Yao}, title = {Dual-view X-ray Detection: Can AI Detect Prohibited Items from Dual-view X-ray Images like Humans?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10338-10347} }
Solving Instance Detection from an Open-World Perspective: Qianqian Shen,

Yunhan Zhao,

Nahyun Kwon,

Jeeeun Kim,

Yanan Li,

Shu Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Qianqian and Zhao, Yunhan and Kwon, Nahyun and Kim, Jeeeun and Li, Yanan and Kong, Shu}, title = {Solving Instance Detection from an Open-World Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9901-9910} }
Percept, Memory, and Imagine: World Feature Simulating for Open-Domain Unknown Object Detection: Aming Wu,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Aming and Deng, Cheng}, title = {Percept, Memory, and Imagine: World Feature Simulating for Open-Domain Unknown Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4682-4691} }
Efficient Depth Estimation for Unstable Stereo Camera Systems on AR Glasses: Yongfan Liu,

Hyoukjun Kwon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yongfan and Kwon, Hyoukjun}, title = {Efficient Depth Estimation for Unstable Stereo Camera Systems on AR Glasses}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6252-6261} }
LiDAR-RT: Gaussian-based Ray Tracing for Dynamic LiDAR Re-simulation: Chenxu Zhou,

Lvchang Fu,

Sida Peng,

Yunzhi Yan,

Zhanhua Zhang,

Yong Chen,

Jiazhi Xia,

Xiaowei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Chenxu and Fu, Lvchang and Peng, Sida and Yan, Yunzhi and Zhang, Zhanhua and Chen, Yong and Xia, Jiazhi and Zhou, Xiaowei}, title = {LiDAR-RT: Gaussian-based Ray Tracing for Dynamic LiDAR Re-simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1538-1548} }
Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator: Chaehun Shin,

Jooyoung Choi,

Heeseung Kim,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_CVPR, author = {Shin, Chaehun and Choi, Jooyoung and Kim, Heeseung and Yoon, Sungroh}, title = {Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7986-7996} }
Flow-NeRF: Joint Learning of Geometry, Poses, and Dense Flow within Unified Neural Representations: Xunzhi Zheng,

Dan Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Xunzhi and Xu, Dan}, title = {Flow-NeRF: Joint Learning of Geometry, Poses, and Dense Flow within Unified Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {993-1002} }
Consistent and Controllable Image Animation with Motion Diffusion Models: Xin Ma,

Yaohui Wang,

Gengyun Jia,

Xinyuan Chen,

Tien-Tsin Wong,

Yuan-Fang Li,

Cunjian Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Xin and Wang, Yaohui and Jia, Gengyun and Chen, Xinyuan and Wong, Tien-Tsin and Li, Yuan-Fang and Chen, Cunjian}, title = {Consistent and Controllable Image Animation with Motion Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7288-7298} }
AA-CLIP: Enhancing Zero-Shot Anomaly Detection via Anomaly-Aware CLIP: Wenxin Ma,

Xu Zhang,

Qingsong Yao,

Fenghe Tang,

Chenxu Wu,

Yingtai Li,

Rui Yan,

Zihang Jiang,

S.Kevin Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Wenxin and Zhang, Xu and Yao, Qingsong and Tang, Fenghe and Wu, Chenxu and Li, Yingtai and Yan, Rui and Jiang, Zihang and Zhou, S.Kevin}, title = {AA-CLIP: Enhancing Zero-Shot Anomaly Detection via Anomaly-Aware CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4744-4754} }
HybridGS: Decoupling Transients and Statics with 2D and 3D Gaussian Splatting: Jingyu Lin,

Jiaqi Gu,

Lubin Fan,

Bojian Wu,

Yujing Lou,

Renjie Chen,

Ligang Liu,

Jieping Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jingyu and Gu, Jiaqi and Fan, Lubin and Wu, Bojian and Lou, Yujing and Chen, Renjie and Liu, Ligang and Ye, Jieping}, title = {HybridGS: Decoupling Transients and Statics with 2D and 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {788-797} }
Channel Consistency Prior and Self-Reconstruction Strategy Based Unsupervised Image Deraining: Guanglu Dong,

Tianheng Zheng,

Yuanzhouhan Cao,

Linbo Qing,

Chao Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Guanglu and Zheng, Tianheng and Cao, Yuanzhouhan and Qing, Linbo and Ren, Chao}, title = {Channel Consistency Prior and Self-Reconstruction Strategy Based Unsupervised Image Deraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7469-7479} }
MobileMamba: Lightweight Multi-Receptive Visual Mamba Network: Haoyang He,

Jiangning Zhang,

Yuxuan Cai,

Hongxu Chen,

Xiaobin Hu,

Zhenye Gan,

Yabiao Wang,

Chengjie Wang,

Yunsheng Wu,

Lei Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Haoyang and Zhang, Jiangning and Cai, Yuxuan and Chen, Hongxu and Hu, Xiaobin and Gan, Zhenye and Wang, Yabiao and Wang, Chengjie and Wu, Yunsheng and Xie, Lei}, title = {MobileMamba: Lightweight Multi-Receptive Visual Mamba Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4497-4507} }
SimLTD: Simple Supervised and Semi-Supervised Long-Tailed Object Detection: Phi Vu Tran; [pdf] [arXiv]
[bibtex]
@InProceedings{Tran_2025_CVPR, author = {Tran, Phi Vu}, title = {SimLTD: Simple Supervised and Semi-Supervised Long-Tailed Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4672-4681} }
HyperSeg: Hybrid Segmentation Assistant with Fine-grained Visual Perceiver: Cong Wei,

Yujie Zhong,

Haoxian Tan,

Yong Liu,

Jie Hu,

Dengjie Li,

Zheng Zhao,

Yujiu Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Cong and Zhong, Yujie and Tan, Haoxian and Liu, Yong and Hu, Jie and Li, Dengjie and Zhao, Zheng and Yang, Yujiu}, title = {HyperSeg: Hybrid Segmentation Assistant with Fine-grained Visual Perceiver}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8931-8941} }
Diffusion-based Event Generation for High-Quality Image Deblurring: Xinan Xie,

Qing Zhang,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Xinan and Zhang, Qing and Zheng, Wei-Shi}, title = {Diffusion-based Event Generation for High-Quality Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2194-2203} }
Balanced Rate-Distortion Optimization in Learned Image Compression: Yichi Zhang,

Zhihao Duan,

Yuning Huang,

Fengqing Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yichi and Duan, Zhihao and Huang, Yuning and Zhu, Fengqing}, title = {Balanced Rate-Distortion Optimization in Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2428-2438} }
Bridge the Gap: From Weak to Full Supervision for Temporal Action Localization with PseudoFormer: Ziyi Liu,

Yangcen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Ziyi and Liu, Yangcen}, title = {Bridge the Gap: From Weak to Full Supervision for Temporal Action Localization with PseudoFormer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8711-8720} }; Back