Papers
- Back
Towards Source-Free Machine Unlearning-
[pdf]
[supp]
[bibtex]@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Sk Miraj and Basaran, Umit Yigit and Raychaudhuri, Dripta S. and Dutta, Arindam and Kundu, Rohit and Niloy, Fahim Faisal and Guler, Basak and Roy-Chowdhury, Amit K.}, title = {Towards Source-Free Machine Unlearning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4948-4957} }
Uni4D: Unifying Visual Foundation Models for 4D Modeling from a Single Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, David Yifan and Zhai, Albert J. and Wang, Shenlong}, title = {Uni4D: Unifying Visual Foundation Models for 4D Modeling from a Single Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1116-1126} }
Hyperbolic Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanpei and He, Zhenqi and Han, Kai}, title = {Hyperbolic Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9891-9900} }
The Language of Motion: Unifying Verbal and Non-verbal Language of 3D Human Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Changan and Zhang, Juze and Lakshmikanth, Shrinidhi K. and Fang, Yusu and Shao, Ruizhi and Wetzstein, Gordon and Fei-Fei, Li and Adeli, Ehsan}, title = {The Language of Motion: Unifying Verbal and Non-verbal Language of 3D Human Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6200-6211} }
CALICO: Part-Focused Semantic Co-Segmentation with Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Kiet A. and Juvekar, Adheesh and Yu, Tianjiao and Wahed, Muntasir and Lourentzou, Ismini}, title = {CALICO: Part-Focused Semantic Co-Segmentation with Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4550-4561} }
Words or Vision: Do Vision-Language Models Have Blind Faith in Text?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Ailin and Cao, Tri and Chen, Zhirui and Hooi, Bryan}, title = {Words or Vision: Do Vision-Language Models Have Blind Faith in Text?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3867-3876} }
Learning to Detect Objects from Multi-Agent LiDAR Scans without Manual Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Qiming and Lin, Wenkai and Xiang, Haoen and Huang, Xun and Chen, Siheng and Dong, Zhen and Wang, Cheng and Wen, Chenglu}, title = {Learning to Detect Objects from Multi-Agent LiDAR Scans without Manual Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1418-1428} }
DeepLA-Net: Very Deep Local Aggregation Networks for Point Cloud Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Ziyin and Dong, Mingyue and Zhou, Jian and Qiu, Huan and Dong, Zhen and Luo, Man and Li, Bijun}, title = {DeepLA-Net: Very Deep Local Aggregation Networks for Point Cloud Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1330-1341} }
Multi-Layer Visual Feature Fusion in Multimodal LLMs: Methods, Analysis, and Best Practices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Junyan and Chen, Haoran and Fan, Yue and Fan, Yingqi and Jin, Xin and Su, Hui and Fu, Jinlan and Shen, Xiaoyu}, title = {Multi-Layer Visual Feature Fusion in Multimodal LLMs: Methods, Analysis, and Best Practices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4156-4166} }
APHQ-ViT: Post-Training Quantization with Average Perturbation Hessian Based Reconstruction for Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Zhuguanyu and Zhang, Jiayi and Chen, Jiaxin and Guo, Jinyang and Huang, Di and Wang, Yunhong}, title = {APHQ-ViT: Post-Training Quantization with Average Perturbation Hessian Based Reconstruction for Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9686-9695} }
AdaptCMVC: Robust Adaption to Incremental Views in Continual Multi-view Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jing and Feng, Songhe and Wickstr{\o}m, Kristoffer Knutsen and Kampffmeyer, Michael C.}, title = {AdaptCMVC: Robust Adaption to Incremental Views in Continual Multi-view Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10285-10294} }
UA-Pose: Uncertainty-Aware 6D Object Pose Estimation and Online Object Completion with Partial References-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ming-Feng and Yang, Xin and Wang, Fu-En and Basak, Hritam and Sun, Yuyin and Gayaka, Shreekant and Sun, Min and Kuo, Cheng-Hao}, title = {UA-Pose: Uncertainty-Aware 6D Object Pose Estimation and Online Object Completion with Partial References}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1180-1189} }
Binarized Mamba-Transformer for Lightweight Quad Bayer HybridEVS Demosaicing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shiyang and Zeng, Haijin and Lu, Yunfan and Shao, Tong and Tang, Ke and Chen, Yongyong and Liu, Jie and Su, Jingyong}, title = {Binarized Mamba-Transformer for Lightweight Quad Bayer HybridEVS Demosaicing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8817-8827} }
Interpretable Image Classification via Non-parametric Part Prototype Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zhijie and Fan, Lei and Pagnucco, Maurice and Song, Yang}, title = {Interpretable Image Classification via Non-parametric Part Prototype Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9762-9771} }
DAGSM: Disentangled Avatar Generation with GS-enhanced Mesh-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Jingyu and Kang, Di and Bao, Linchao and Lin, Liang and Li, Guanbin}, title = {DAGSM: Disentangled Avatar Generation with GS-enhanced Mesh}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {292-303} }
Estimating Body and Hand Motion in an Ego-sensed World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2025_CVPR, author = {Yi, Brent and Ye, Vickie and Zheng, Maya and Li, Yunqi and M\"uller, Lea and Pavlakos, Georgios and Ma, Yi and Malik, Jitendra and Kanazawa, Angjoo}, title = {Estimating Body and Hand Motion in an Ego-sensed World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7072-7084} }
Evaluating Vision-Language Models as Evaluators in Path Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aghzal_2025_CVPR, author = {Aghzal, Mohamed and Yue, Xiang and Plaku, Erion and Yao, Ziyu}, title = {Evaluating Vision-Language Models as Evaluators in Path Planning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6886-6897} }
Free on the Fly: Enhancing Flexibility in Test-Time Adaptation with Online EM-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyuan and Yang, Sibei}, title = {Free on the Fly: Enhancing Flexibility in Test-Time Adaptation with Online EM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9538-9548} }
SGC-Net: Stratified Granular Comparison Network for Open-Vocabulary HOI Detection-
[pdf]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Xin and Shi, Chong and Yang, Zuopeng and Tang, Haojin and Zhou, Zhili}, title = {SGC-Net: Stratified Granular Comparison Network for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4539-4549} }
Galaxy Walker: Geometry-aware VLMs For Galaxy-scale Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Tianyu and Fu, Xingcheng and Gao, Yisen and Qian, Haodong and Wei, Yuecen and Yan, Kun and Zhou, Haoyi and Li, Jianxin}, title = {Galaxy Walker: Geometry-aware VLMs For Galaxy-scale Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4112-4121} }
SnowMaster: Comprehensive Real-world Image Desnowing via MLLM with Multi-Model Feedback Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Jianyu and Chen, Sixiang and Lin, Yunlong and Ye, Tian and Liu, Yun and Fei, Song and Xing, Zhaohu and Wu, Hongtao and Wang, Weiming and Zhu, Lei}, title = {SnowMaster: Comprehensive Real-world Image Desnowing via MLLM with Multi-Model Feedback Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4302-4312} }
Exploring Timeline Control for Facial Motion Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Yifeng and Qi, Jinwei and Ji, Chaonan and Zhang, Peng and Zhang, Bang and Deng, Zhidong and Bo, Liefeng}, title = {Exploring Timeline Control for Facial Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1940-1950} }
GAF: Gaussian Avatar Reconstruction from Monocular Videos via Multi-view Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Jiapeng and Davoli, Davide and Kirschstein, Tobias and Schoneveld, Liam and Nie{\ss}ner, Matthias}, title = {GAF: Gaussian Avatar Reconstruction from Monocular Videos via Multi-view Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5546-5558} }
AI-Face: A Million-Scale Demographically Annotated AI-Generated Face Dataset and Fairness Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Li and Santosh, Santosh and Wu, Mingyang and Wang, Xin and Hu, Shu}, title = {AI-Face: A Million-Scale Demographically Annotated AI-Generated Face Dataset and Fairness Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3503-3515} }
Enhancing Video-LLM Reasoning via Agent-of-Thoughts Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Xie, Weidi}, title = {Enhancing Video-LLM Reasoning via Agent-of-Thoughts Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8523-8533} }
De^2Gaze: Deformable and Decoupled Representation Learning for 3D Gaze Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yunfeng and Bai, Xiaowei and Chen, Baojun and Su, Hao and He, Hao and Xie, Liang and Yin, Erwei}, title = {De{\textasciicircum}2Gaze: Deformable and Decoupled Representation Learning for 3D Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3091-3100} }
ReCapture: Generative Video Camera Controls for User-Provided Videos using Masked Video Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, David Junhao and Paiss, Roni and Zada, Shiran and Karnad, Nikhil and Jacobs, David E. and Pritch, Yael and Mosseri, Inbar and Shou, Mike Zheng and Wadhwa, Neal and Ruiz, Nataniel}, title = {ReCapture: Generative Video Camera Controls for User-Provided Videos using Masked Video Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2050-2062} }
Self-Expansion of Pre-trained Models with Mixture of Adapters for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Huiyi and Lu, Haodong and Yao, Lina and Gong, Dong}, title = {Self-Expansion of Pre-trained Models with Mixture of Adapters for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10087-10098} }
Brain-Inspired Spiking Neural Networks for Energy-Efficient Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ziqi and Gao, Tao and An, Yisheng and Chen, Ting and Zhang, Jing and Wen, Yuanbo and Liu, Mengkun and Zhang, Qianxi}, title = {Brain-Inspired Spiking Neural Networks for Energy-Efficient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3552-3562} }
Medusa: A Multi-Scale High-order Contrastive Dual-Diffusion Approach for Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Liang and Xue, Zhe and Li, Yawen and Liang, Meiyu and Wang, Yan and van den Hengel, Anton and Qi, Yuankai}, title = {Medusa: A Multi-Scale High-order Contrastive Dual-Diffusion Approach for Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10295-10304} }
MambaOut: Do We Really Need Mamba for Vision?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Weihao and Wang, Xinchao}, title = {MambaOut: Do We Really Need Mamba for Vision?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4484-4496} }
Seurat: From Moving Points to Depth-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2025_CVPR, author = {Cho, Seokju and Huang, Jiahui and Kim, Seungryong and Lee, Joon-Young}, title = {Seurat: From Moving Points to Depth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7211-7221} }
Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jiao_2025_CVPR, author = {Jiao, Qirui and Chen, Daoyuan and Huang, Yilun and Ding, Bolin and Li, Yaliang and Shen, Ying}, title = {Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9296-9307} }
The Devil is in the Prompts: Retrieval-Augmented Prompt Optimization for Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Bingjie and Gao, Xinyu and Wu, Xiaoxue and Zhou, Yujie and Qiao, Yu and Niu, Li and Chen, Xinyuan and Wang, Yaohui}, title = {The Devil is in the Prompts: Retrieval-Augmented Prompt Optimization for Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3173-3183} }
DnLUT: Ultra-Efficient Color Image Denoising via Channel-Aware Lookup Tables-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Sidi and Huang, Binxiao and Zhang, Yulun and Yu, Dahai and Yang, Yujiu and Wong, Ngai}, title = {DnLUT: Ultra-Efficient Color Image Denoising via Channel-Aware Lookup Tables}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7582-7591} }
BiM-VFI: Bidirectional Motion Field-Guided Frame Interpolation for Video with Non-uniform Motions-
[pdf]
[supp]
[bibtex]@InProceedings{Seo_2025_CVPR, author = {Seo, Wonyong and Oh, Jihyong and Kim, Munchurl}, title = {BiM-VFI: Bidirectional Motion Field-Guided Frame Interpolation for Video with Non-uniform Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7244-7253} }
SATA: Spatial Autocorrelation Token Analysis for Enhancing the Robustness of Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nikzad_2025_CVPR, author = {Nikzad, Nick and Liao, Yi and Gao, Yongsheng and Zhou, Jun}, title = {SATA: Spatial Autocorrelation Token Analysis for Enhancing the Robustness of Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9730-9739} }
Nested Diffusion Models Using Hierarchical Latent Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiao and Jiang, Ruoxi and Willett, Rebecca and Maire, Michael}, title = {Nested Diffusion Models Using Hierarchical Latent Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2502-2512} }
A Theory of Learning Unified Model via Knowledge Integration from Label Space Varying Domains-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dexuan and Westfechtel, Thomas and Harada, Tatsuya}, title = {A Theory of Learning Unified Model via Knowledge Integration from Label Space Varying Domains}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10142-10152} }
HiLoTs: High-Low Temporal Sensitive Representation Learning for Semi-Supervised LiDAR Segmentation in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, R.D. and Weng, Pengcheng and Wang, Yinqiao and Ding, Han and Han, Jinsong and Wang, Fei}, title = {HiLoTs: High-Low Temporal Sensitive Representation Learning for Semi-Supervised LiDAR Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1429-1438} }
DKDM: Data-Free Knowledge Distillation for Diffusion Models with Any Architecture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2025_CVPR, author = {Xiang, Qianlong and Zhang, Miao and Shang, Yuzhang and Wu, Jianlong and Yan, Yan and Nie, Liqiang}, title = {DKDM: Data-Free Knowledge Distillation for Diffusion Models with Any Architecture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2955-2965} }
SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2025_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Xu, Haiyang and Ye, Wei and Dong, Mengfan and Yan, Ming and Zhang, Ji and Huang, Fei and Zhang, Shikun}, title = {SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9361-9371} }
Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zefeng and Tang, Hengzhu and Sheng, Jiawei and Zhang, Zhenyu and Ren, Yiming and Li, Zhenyang and Yin, Dawei and Ma, Duohe and Liu, Tingwen}, title = {Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9423-9433} }
Feat2GS: Probing Visual Foundation Models with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yue and Chen, Xingyu and Chen, Anpei and Pons-Moll, Gerard and Xiu, Yuliang}, title = {Feat2GS: Probing Visual Foundation Models with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6348-6361} }
LSNet: See Large, Focus Small-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {LSNet: See Large, Focus Small}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9718-9729} }
DynamicScaler: Seamless and Scalable Video Generation for Panoramic Scenes-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jinxiu and Lin, Shaoheng and Li, Yinxiao and Yang, Ming-Hsuan}, title = {DynamicScaler: Seamless and Scalable Video Generation for Panoramic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6144-6153} }
DocLayLLM: An Efficient Multi-modal Extension of Large Language Models for Text-rich Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Wenhui and Wang, Jiapeng and Li, Hongliang and Wang, Chengyu and Huang, Jun and Jin, Lianwen}, title = {DocLayLLM: An Efficient Multi-modal Extension of Large Language Models for Text-rich Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4038-4049} }
EDEN: Enhanced Diffusion for High-quality Large-motion Video Frame Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihao and Chen, Haoran and Zhao, Haoyu and Lu, Guansong and Fu, Yanwei and Xu, Hang and Wu, Zuxuan}, title = {EDEN: Enhanced Diffusion for High-quality Large-motion Video Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2105-2115} }
Handling Spatial-Temporal Data Heterogeneity for Federated Continual Learning via Tail Anchor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hao and Yang, Xin and Zhang, Le and Gu, Hanlin and Li, Tianrui and Fan, Lixin and Yang, Qiang}, title = {Handling Spatial-Temporal Data Heterogeneity for Federated Continual Learning via Tail Anchor}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4874-4883} }
DeSiRe-GS: 4D Street Gaussians for Static-Dynamic Decomposition and Surface Reconstruction for Urban Driving Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Chensheng and Zhang, Chengwei and Wang, Yixiao and Xu, Chenfeng and Xie, Yichen and Zheng, Wenzhao and Keutzer, Kurt and Tomizuka, Masayoshi and Zhan, Wei}, title = {DeSiRe-GS: 4D Street Gaussians for Static-Dynamic Decomposition and Surface Reconstruction for Urban Driving Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6782-6791} }
REWIND: Real-Time Egocentric Whole-Body Motion Diffusion with Exemplar-Based Identity Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jihyun and Xu, Weipeng and Richard, Alexander and Wei, Shih-En and Saito, Shunsuke and Bai, Shaojie and Wang, Te-Li and Sung, Minhyuk and Kim, Tae-Kyun and Saragih, Jason}, title = {REWIND: Real-Time Egocentric Whole-Body Motion Diffusion with Exemplar-Based Identity Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7095-7104} }
DoraCycle: Domain-Oriented Adaptation of Unified Generative Model in Multimodal Cycles-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Rui and Mao, Weijia and Shou, Mike Zheng}, title = {DoraCycle: Domain-Oriented Adaptation of Unified Generative Model in Multimodal Cycles}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2835-2846} }
Gaussian Splashing: Unified Particles for Versatile Motion Synthesis and Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Yutao and Feng, Xiang and Shang, Yintong and Jiang, Ying and Yu, Chang and Zong, Zeshun and Shao, Tianjia and Wu, Hongzhi and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {Gaussian Splashing: Unified Particles for Versatile Motion Synthesis and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {518-529} }
Improve Representation for Imbalanced Regression through Geometric Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Zijian and Wu, Yilei and Chen, Chongyao and Zou, Yingtian and Zhang, Yichi and Zhou, Juan Helen}, title = {Improve Representation for Imbalanced Regression through Geometric Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5082-5091} }
PartRM: Modeling Part-Level Dynamics with Large Cross-State Reconstruction Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Mingju and Pan, Yike and Gao, Huan-ang and Zhang, Zongzheng and Li, Wenyi and Dong, Hao and Tang, Hao and Yi, Li and Zhao, Hao}, title = {PartRM: Modeling Part-Level Dynamics with Large Cross-State Reconstruction Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7004-7014} }
DiffFNO: Diffusion Fourier Neural Operator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xiaoyi and Tang, Hao}, title = {DiffFNO: Diffusion Fourier Neural Operator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {150-160} }
Zero-Shot Styled Text Image Generation, but Make It Autoregressive-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pippi_2025_CVPR, author = {Pippi, Vittorio and Quattrini, Fabio and Cascianelli, Silvia and Tonioni, Alessio and Cucchiara, Rita}, title = {Zero-Shot Styled Text Image Generation, but Make It Autoregressive}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7910-7919} }
Leveraging Perturbation Robustness to Enhance Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Wenxi and Yeh, Raymond A. and Mou, Shaoshuai and Gu, Yan}, title = {Leveraging Perturbation Robustness to Enhance Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4724-4733} }
SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Seokhyeon and Kim, Chaelin and Yoon, Serin and Nam, Junghyun and Cha, Sihun and Noh, Junyong}, title = {SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7158-7168} }
LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Pascal and Sancho, Sergio and Tang, Jingwei and Gross, Markus and Azevedo, Vinicius}, title = {LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24-33} }
ShowMak3r: Compositional TV Show Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sangmin and Do, Seunguk and Park, Jaesik}, title = {ShowMak3r: Compositional TV Show Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {864-874} }
CADRef: Robust Out-of-Distribution Detection via Class-Aware Decoupled Relative Feature Leveraging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2025_CVPR, author = {Ling, Zhiwei and Chang, Yachen and Zhao, Hailiang and Zhao, Xinkui and Chow, Kingsum and Deng, Shuiguang}, title = {CADRef: Robust Out-of-Distribution Detection via Class-Aware Decoupled Relative Feature Leveraging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4968-4977} }
VideoDirector: Precise Video Editing via Text-to-Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yukun and Wang, Longguang and Ma, Zhiyuan and Hu, Qibin and Xu, Kai and Guo, Yulan}, title = {VideoDirector: Precise Video Editing via Text-to-Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2589-2598} }
VISTA: Enhancing Long-Duration and High-Resolution Video Understanding by Video Spatiotemporal Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Weiming and Yang, Huan and Min, Jie and Wei, Cong and Chen, Wenhu}, title = {VISTA: Enhancing Long-Duration and High-Resolution Video Understanding by Video Spatiotemporal Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3804-3814} }
GA3CE: Unconstrained 3D Gaze Estimation with Gaze-Aware 3D Context Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kawana_2025_CVPR, author = {Kawana, Yuki and Shiba, Shintaro and Kong, Quan and Kobori, Norimasa}, title = {GA3CE: Unconstrained 3D Gaze Estimation with Gaze-Aware 3D Context Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3081-3090} }
RigGS: Rigging of 3D Gaussians for Modeling Articulated Objects in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Yuxin and Deng, Zhi and Hou, Junhui}, title = {RigGS: Rigging of 3D Gaussians for Modeling Articulated Objects in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5592-5601} }
Noise Modeling in One Hour: Minimizing Preparation Efforts for Self-supervised Low-Light RAW Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Feiran and Jiang, Haiyang and Iso, Daisuke}, title = {Noise Modeling in One Hour: Minimizing Preparation Efforts for Self-supervised Low-Light RAW Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5699-5708} }
High Dynamic Range Video Compression: A Large-Scale Benchmark Dataset and A Learned Bit-depth Scalable Compression Algorithm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zhaoyi and Wang, Feifeng and Wang, Shiwei and Zhou, Zihao and Zhu, Yao and Shen, Liquan}, title = {High Dynamic Range Video Compression: A Large-Scale Benchmark Dataset and A Learned Bit-depth Scalable Compression Algorithm}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7320-7330} }
DivPrune: Diversity-based Visual Token Pruning for Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alvar_2025_CVPR, author = {Alvar, Saeed Ranjbar and Singh, Gursimran and Akbari, Mohammad and Zhang, Yong}, title = {DivPrune: Diversity-based Visual Token Pruning for Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9392-9401} }
3D-AVS: LiDAR-based 3D Auto-Vocabulary Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Weijie and \"Ulger, Osman and Nejadasl, Fatemeh Karimi and Gevers, Theo and Oswald, Martin R.}, title = {3D-AVS: LiDAR-based 3D Auto-Vocabulary Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8910-8920} }
MEGA: Masked Generative Autoencoder for Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fiche_2025_CVPR, author = {Fiche, Gu\'enol\'e and Leglaive, Simon and Alameda-Pineda, Xavier and Moreno-Noguer, Francesc}, title = {MEGA: Masked Generative Autoencoder for Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5366-5378} }
Disentangling Safe and Unsafe Image Corruptions via Anisotropy and Locality-
[pdf]
[supp]
[bibtex]@InProceedings{Muthukumar_2025_CVPR, author = {Muthukumar, Ramchandran and Pal, Ambar and Sulam, Jeremias and Vidal, Rene}, title = {Disentangling Safe and Unsafe Image Corruptions via Anisotropy and Locality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9954-9963} }
Prometheus: 3D-Aware Latent Diffusion Models for Feed-Forward Text-to-3D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yuanbo and Shao, Jiahao and Li, Xinyang and Shen, Yujun and Geiger, Andreas and Liao, Yiyi}, title = {Prometheus: 3D-Aware Latent Diffusion Models for Feed-Forward Text-to-3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2857-2869} }
SphereUFormer: A U-Shaped Transformer for Spherical 360 Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Benny_2025_CVPR, author = {Benny, Yaniv and Wolf, Lior}, title = {SphereUFormer: A U-Shaped Transformer for Spherical 360 Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {940-950} }
Beyond Clean Training Data: A Versatile and Model-Agnostic Framework for Out-of-Distribution Detection with Contaminated Training Data-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuchuan and Kang, Jae-Mo and Kim, Il-Min}, title = {Beyond Clean Training Data: A Versatile and Model-Agnostic Framework for Out-of-Distribution Detection with Contaminated Training Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10183-10192} }
FreeUV: Ground-Truth-Free Realistic Facial UV Texture Recovery via Cross-Assembly Inference Strategy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Xingchao and Taketomi, Takafumi and Endo, Yuki and Kanamori, Yoshihiro}, title = {FreeUV: Ground-Truth-Free Realistic Facial UV Texture Recovery via Cross-Assembly Inference Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {326-337} }
HarmonySet: A Comprehensive Dataset for Understanding Video-Music Semantic Alignment and Temporal Synchronization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zitang and Mei, Ke and Lu, Yu and Wang, Tianyi and Rao, Fengyun}, title = {HarmonySet: A Comprehensive Dataset for Understanding Video-Music Semantic Alignment and Temporal Synchronization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3152-3162} }
StyleMaster: Stylize Your Video with Artistic Generation and Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Zixuan and Huang, Huijuan and Wang, Xintao and Wan, Pengfei and Zhang, Di and Luo, Wenhan}, title = {StyleMaster: Stylize Your Video with Artistic Generation and Translation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2630-2640} }
Unsupervised Continual Domain Shift Learning with Multi-Prototype Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Haopeng and Zhang, Yingwei and Xu, Lumin and Jin, Sheng and Luo, Ping and Qian, Chen and Liu, Wentao and Chen, Yiqiang}, title = {Unsupervised Continual Domain Shift Learning with Multi-Prototype Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10131-10141} }
OmniGuard: Hybrid Manipulation Localization via Augmented Versatile Deep Image Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xuanyu and Tang, Zecheng and Xu, Zhipei and Li, Runyi and Xu, Youmin and Chen, Bin and Gao, Feng and Zhang, Jian}, title = {OmniGuard: Hybrid Manipulation Localization via Augmented Versatile Deep Image Watermarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3008-3018} }
Open-Canopy: Towards Very High Resolution Forest Monitoring-
[pdf]
[supp]
[bibtex]@InProceedings{Fogel_2025_CVPR, author = {Fogel, Fajwel and Perron, Yohann and Besic, Nikola and Saint-Andr\'e, Laurent and Pellissier-Tanon, Agn\`es and Schwartz, Martin and Boudras, Thomas and Fayad, Ibrahim and d'Aspremont, Alexandre and Landrieu, Loic and Ciais, Philippe}, title = {Open-Canopy: Towards Very High Resolution Forest Monitoring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1395-1406} }
Vision-Language Model IP Protection via Prompt-based Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Lianyu and Wang, Meng and Fu, Huazhu and Zhang, Daoqiang}, title = {Vision-Language Model IP Protection via Prompt-based Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9497-9506} }
Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jiantao and Yang, Xin and Chen, Meixi and Xu, Yingjie and Yan, Dongyu and Wu, Leyi and Xu, Xinli and Xu, Lie and Zhang, Shunsi and Chen, Ying-Cong}, title = {Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5870-5880} }
Koala-36M: A Large-scale Video Dataset Improving Consistency between Fine-grained Conditions and Video Content-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qiuheng and Shi, Yukai and Ou, Jiarong and Chen, Rui and Lin, Ke and Wang, Jiahao and Jiang, Boyuan and Yang, Haotian and Zheng, Mingwu and Tao, Xin and Yang, Fei and Wan, Pengfei and Zhang, Di}, title = {Koala-36M: A Large-scale Video Dataset Improving Consistency between Fine-grained Conditions and Video Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8428-8437} }
VASparse: Towards Efficient Visual Hallucination Mitigation via Visual-Aware Token Sparsification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Xianwei and Zhu, Zhihong and Xie, Yuxin and Liang, Liming and Zou, Yuexian}, title = {VASparse: Towards Efficient Visual Hallucination Mitigation via Visual-Aware Token Sparsification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4189-4199} }
SPARC: Score Prompting and Adaptive Fusion for Zero-Shot Multi-Label Recognition in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miller_2025_CVPR, author = {Miller, Kevin and Gangrade, Aditya and Mishra, Samarth and Saenko, Kate and Saligrama, Venkatesh}, title = {SPARC: Score Prompting and Adaptive Fusion for Zero-Shot Multi-Label Recognition in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4313-4321} }
Erase Diffusion: Empowering Object Removal Through Calibrating Diffusion Pathways-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yi and Zhou, Hao and Cui, Benlei and Shang, Wenxiang and Lin, Ran}, title = {Erase Diffusion: Empowering Object Removal Through Calibrating Diffusion Pathways}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2418-2427} }
Prompt-CAM: Making Vision Transformers Interpretable for Fine-Grained Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Chowdhury_2025_CVPR, author = {Chowdhury, Arpita and Paul, Dipanjyoti and Mai, Zheda and Gu, Jianyang and Zhang, Ziheng and Mehrab, Kazi Sajeed and Campolongo, Elizabeth G. and Rubenstein, Daniel and Stewart, Charles V. and Karpatne, Anuj and Berger-Wolf, Tanya and Su, Yu and Chao, Wei-Lun}, title = {Prompt-CAM: Making Vision Transformers Interpretable for Fine-Grained Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4375-4385} }
Instruction-based Image Manipulation by Watching How Things Move-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Mingdeng and Zhang, Xuaner and Zheng, Yinqiang and Xia, Zhihao}, title = {Instruction-based Image Manipulation by Watching How Things Move}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2704-2713} }
Ferret: An Efficient Online Continual Learning Framework under Varying Memory Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yuhao and Tian, Yuxin and Lv, Jindi and Shi, Mingjia and Li, Yuanxi and Ye, Qing and Zhang, Shuhao and Lv, Jiancheng}, title = {Ferret: An Efficient Online Continual Learning Framework under Varying Memory Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4850-4861} }
VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yunlong and Guo, Junjia and Hua, Hang and Liang, Susan and Feng, Mingqian and Li, Xinyang and Mao, Rui and Huang, Chao and Bi, Jing and Zhang, Zeliang and Fazli, Pooyan and Xu, Chenliang}, title = {VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8490-8500} }
Self-Supervised Learning for Color Spike Camera Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Yanchen and Xiong, Ruiqin and Fan, Xiaopeng and Yu, Zhaofei and Tian, Yonghong and Huang, Tiejun}, title = {Self-Supervised Learning for Color Spike Camera Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6231-6240} }
From Elements to Design: A Layered Approach for Automatic Graphic Design Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jiawei and Sun, Shizhao and Huang, Danqing and Liu, Ting and Li, Ji and Bian, Jiang}, title = {From Elements to Design: A Layered Approach for Automatic Graphic Design Composition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8128-8137} }
SALOVA: Segment-Augmented Long Video Assistant for Targeted Retrieval and Routing in Long-Form Video Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Junho and Kim, Hyunjun and Lee, Hosu and Ro, Yong Man}, title = {SALOVA: Segment-Augmented Long Video Assistant for Targeted Retrieval and Routing in Long-Form Video Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3352-3362} }
DA-VPT: Semantic-Guided Visual Prompt Tuning for Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Li and Chen, Chen and Wang, Liqiang and Hua, Kien}, title = {DA-VPT: Semantic-Guided Visual Prompt Tuning for Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4353-4363} }
Towards Lossless Implicit Neural Representation via Bit Plane Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Woo Kyoung and Lee, Byeonghun and Cho, Hyunmin and Im, Sunghoon and Jin, Kyong Hwan}, title = {Towards Lossless Implicit Neural Representation via Bit Plane Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2269-2278} }
iSegMan: Interactive Segment-and-Manipulate 3D Gaussians-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yian and Xu, Wanshi and Zheng, Ruochong and Qiao, Pengchong and Liu, Chang and Chen, Jie}, title = {iSegMan: Interactive Segment-and-Manipulate 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {661-670} }
BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Xudong and Chen, Yinghao and Chen, Cheng and Tan, Hui and Chen, Boheng and Xie, Yina and Hu, Rui and Tan, Guanxin and Wu, Renshou and Hu, Yan and Zeng, Yi and Wu, Lei and Bian, Liuyang and Wang, Zhaoxiong and Liu, Long and Yang, Yanzhou and Xiao, Han and Zhou, Aojun and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4145-4155} }
Unraveling Normal Anatomy via Fluid-Driven Anomaly Randomization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Peirong and Aguila, Ana Lawry and Iglesias, Juan E.}, title = {Unraveling Normal Anatomy via Fluid-Driven Anomaly Randomization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10455-10465} }
Taming Teacher Forcing for Masked Autoregressive Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Deyu and Sun, Quan and Peng, Yuang and Yan, Kun and Dong, Runpei and Wang, Duomin and Ge, Zheng and Duan, Nan and Zhang, Xiangyu}, title = {Taming Teacher Forcing for Masked Autoregressive Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7374-7384} }
Revisiting Backdoor Attacks against Large Vision-Language Models from Domain Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Siyuan and Liang, Jiawei and Pang, Tianyu and Du, Chao and Liu, Aishan and Zhu, Mingli and Cao, Xiaochun and Tao, Dacheng}, title = {Revisiting Backdoor Attacks against Large Vision-Language Models from Domain Shift}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9477-9486} }
TCFG: Tangential Damping Classifier-free Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2025_CVPR, author = {Kwon, Mingi and Kim, Shin seong and Jeong, Jaeseok and Hsiao, Yi Ting and Uh, Youngjung}, title = {TCFG: Tangential Damping Classifier-free Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2620-2629} }
MatAnyone: Stable Video Matting with Consistent Memory Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Peiqing and Zhou, Shangchen and Zhao, Jixin and Tao, Qingyi and Loy, Chen Change}, title = {MatAnyone: Stable Video Matting with Consistent Memory Propagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7299-7308} }
Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deitke_2025_CVPR, author = {Deitke, Matt and Clark, Christopher and Lee, Sangho and Tripathi, Rohun and Yang, Yue and Park, Jae Sung and Salehi, Mohammadreza and Muennighoff, Niklas and Lo, Kyle and Soldaini, Luca and Lu, Jiasen and Anderson, Taira and Bransom, Erin and Ehsani, Kiana and Ngo, Huong and Chen, YenSung and Patel, Ajay and Yatskar, Mark and Callison-Burch, Chris and Head, Andrew and Hendrix, Rose and Bastani, Favyen and VanderBilt, Eli and Lambert, Nathan and Chou, Yvonne and Chheda, Arnavi and Sparks, Jenna and Skjonsberg, Sam and Schmitz, Michael and Sarnat, Aaron and Bischoff, Byron and Walsh, Pete and Newell, Chris and Wolters, Piper and Gupta, Tanmay and Zeng, Kuo-Hao and Borchardt, Jon and Groeneveld, Dirk and Nam, Crystal and Lebrecht, Sophie and Wittlif, Caitlin and Schoenick, Carissa and Michel, Oscar and Krishna, Ranjay and Weihs, Luca and Smith, Noah A. and Hajishirzi, Hannaneh and Girshick, Ross and Farhadi, Ali and Kembhavi, Aniruddha}, title = {Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {91-104} }
MMTL-UniAD: A Unified Framework for Multimodal and Multi-Task Learning in Assistive Driving Perception-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Wenzhuo and Wang, Wenshuo and Qiao, Yicheng and Guo, Qiannan and Zhu, Jiayin and Li, Pengfei and Chen, Zilong and Yang, Huiming and Li, Zhiwei and Wang, Lening and Tan, Tiao and Liu, Huaping}, title = {MMTL-UniAD: A Unified Framework for Multimodal and Multi-Task Learning in Assistive Driving Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6864-6874} }
T2V-CompBench: A Comprehensive Benchmark for Compositional Text-to-video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Kaiyue and Huang, Kaiyi and Liu, Xian and Wu, Yue and Xu, Zihan and Li, Zhenguo and Liu, Xihui}, title = {T2V-CompBench: A Comprehensive Benchmark for Compositional Text-to-video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8406-8416} }
Multimodal Autoregressive Pre-training of Large Vision Encoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fini_2025_CVPR, author = {Fini, Enrico and Shukor, Mustafa and Li, Xiujun and Dufter, Philipp and Klein, Michal and Haldimann, David and Aitharaju, Sai and da Costa, Victor G. Turrisi and B\'ethune, Louis and Gan, Zhe and Toshev, Alexander and Eichner, Marcin and Nabi, Moin and Yang, Yinfei and Susskind, Joshua and El-Nouby, Alaaeldin}, title = {Multimodal Autoregressive Pre-training of Large Vision Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9641-9654} }
AKiRa: Augmentation Kit on Rays for Optical Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xi and Courant, Robin and Christie, Marc and Kalogeiton, Vicky}, title = {AKiRa: Augmentation Kit on Rays for Optical Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2609-2619} }
TFCustom: Customized Image Generation with Time-Aware Frequency Feature Guidance-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Mushui and She, Dong and Pang, Jingxuan and Huang, Qihan and Ying, Jiacheng and He, Wanggui and Hou, Yuanlei and Fu, Siming}, title = {TFCustom: Customized Image Generation with Time-Aware Frequency Feature Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2714-2723} }
SketchFusion: Learning Universal Sketch Features through Fusing Foundation Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Koley_2025_CVPR, author = {Koley, Subhadeep and Dutta, Tapas Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Bhunia, Ayan Kumar and Song, Yi-Zhe}, title = {SketchFusion: Learning Universal Sketch Features through Fusing Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2556-2567} }
Bridging the Vision-Brain Gap with an Uncertainty-Aware Blur Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Haitao and Li, Qing and Zhang, Changqing and He, Zhen and Ying, Xiaomin}, title = {Bridging the Vision-Brain Gap with an Uncertainty-Aware Blur Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2246-2257} }
AffordDP: Generalizable Diffusion Policy with Transferable Affordance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Shijie and Zhu, Yihang and Huang, Yunao and Zhu, Kaizhen and Gu, Jiayuan and Yu, Jingyi and Shi, Ye and Wang, Jingya}, title = {AffordDP: Generalizable Diffusion Policy with Transferable Affordance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6971-6980} }
HMAR: Efficient Hierarchical Masked Auto-Regressive Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Kumbong_2025_CVPR, author = {Kumbong, Hermann and Liu, Xian and Lin, Tsung-Yi and Liu, Ming-Yu and Liu, Xihui and Liu, Ziwei and Fu, Daniel Y. and Re, Christopher and Romero, David W.}, title = {HMAR: Efficient Hierarchical Masked Auto-Regressive Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2535-2544} }
DKC: Differentiated Knowledge Consolidation for Cloth-Hybrid Lifelong Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2025_CVPR, author = {Cui, Zhenyu and Zhou, Jiahuan and Peng, Yuxin}, title = {DKC: Differentiated Knowledge Consolidation for Cloth-Hybrid Lifelong Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3573-3582} }
Enhancing Facial Privacy Protection via Weakening Diffusion Purification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Salar_2025_CVPR, author = {Salar, Ali and Liu, Qing and Tian, Yingli and Zhao, Guoying}, title = {Enhancing Facial Privacy Protection via Weakening Diffusion Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8235-8244} }
ORIDa: Object-centric Real-world Image Composition Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jinwoo and Han, Sangmin and Jeong, Jinho and Choi, Jiwoo and Kim, Dongyeoung and Kim, Seon Joo}, title = {ORIDa: Object-centric Real-world Image Composition Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3051-3060} }
Image Generation Diversity Issues and How to Tame Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dombrowski_2025_CVPR, author = {Dombrowski, Mischa and Zhang, Weitong and Cechnicka, Sarah and Reynaud, Hadrien and Kainz, Bernhard}, title = {Image Generation Diversity Issues and How to Tame Them}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3029-3039} }
Annotation Ambiguity Aware Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Kumari_2025_CVPR, author = {Kumari, Suruchi and Singh, Pravendra}, title = {Annotation Ambiguity Aware Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10404-10413} }
CAP4D: Creating Animatable 4D Portrait Avatars with Morphable Multi-View Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taubner_2025_CVPR, author = {Taubner, Felix and Zhang, Ruihang and Tuli, Mathieu and Lindell, David B.}, title = {CAP4D: Creating Animatable 4D Portrait Avatars with Morphable Multi-View Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5318-5330} }
CORE4D: A 4D Human-Object-Human Interaction Dataset for Collaborative Object REarrangement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yun and Zhang, Chengwen and Xing, Ruofan and Tang, Bingda and Yang, Bowen and Yi, Li}, title = {CORE4D: A 4D Human-Object-Human Interaction Dataset for Collaborative Object REarrangement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1769-1782} }
POp-GS: Next Best View in 3D-Gaussian Splatting with P-Optimality-
[pdf]
[supp]
[bibtex]@InProceedings{Wilson_2025_CVPR, author = {Wilson, Joey and Almeida, Marcelino and Mahajan, Sachit and Labrie, Martin and Ghaffari, Maani and Ghasemalizadeh, Omid and Sun, Min and Kuo, Cheng-Hao and Sen, Arnab}, title = {POp-GS: Next Best View in 3D-Gaussian Splatting with P-Optimality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3646-3655} }
Critic-V: VLM Critics Help Catch VLM Errors in Multimodal Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Di and Lei, Jingdi and Li, Junxian and Wang, Xunzhi and Liu, Yujie and Yang, Zonglin and Li, Jiatong and Wang, Weida and Yang, Suorong and Wu, Jianbo and Ye, Peng and Ouyang, Wanli and Zhou, Dongzhan}, title = {Critic-V: VLM Critics Help Catch VLM Errors in Multimodal Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9050-9061} }
MaRI: Material Retrieval Integration across Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jianhui and Yang, Zhifei and He, Yangfan and Zhang, Huixiong and Chen, Yuxuan and Huang, Jingwei}, title = {MaRI: Material Retrieval Integration across Domains}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5814-5823} }
Q-Bench-Video: Benchmark the Video Quality Understanding of LMMs-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zicheng and Jia, Ziheng and Wu, Haoning and Li, Chunyi and Chen, Zijian and Zhou, Yingjie and Sun, Wei and Liu, Xiaohong and Min, Xiongkuo and Lin, Weisi and Zhai, Guangtao}, title = {Q-Bench-Video: Benchmark the Video Quality Understanding of LMMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3229-3239} }
Glossy Object Reconstruction with Cost-effective Polarized Acquisition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Bojian and Peng, Yifan and Hu, Ruizhen and Zhou, Xiaowei}, title = {Glossy Object Reconstruction with Cost-effective Polarized Acquisition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {422-431} }
L-SWAG: Layer-Sample Wise Activation with Gradients Information for Zero-Shot NAS on Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Casarin_2025_CVPR, author = {Casarin, Sofia and Escalera, Sergio and Lanz, Oswald}, title = {L-SWAG: Layer-Sample Wise Activation with Gradients Information for Zero-Shot NAS on Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4441-4451} }
Commonsense Video Question Answering through Video-Grounded Entailment Tree Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Huabin and Ilievski, Filip and Snoek, Cees G. M.}, title = {Commonsense Video Question Answering through Video-Grounded Entailment Tree Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3262-3271} }
Lifelong Knowledge Editing for Vision Language Models with Low-Rank Mixture-of-Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Qizhou and Wang, Chengyu and Wang, Dakan and Zhang, Taolin and Li, Wangyue and He, Xiaofeng}, title = {Lifelong Knowledge Editing for Vision Language Models with Low-Rank Mixture-of-Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9455-9466} }
PartGen: Part-level 3D Generation and Reconstruction with Multi-view Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Minghao and Shapovalov, Roman and Laina, Iro and Monnier, Tom and Wang, Jianyuan and Novotny, David and Vedaldi, Andrea}, title = {PartGen: Part-level 3D Generation and Reconstruction with Multi-view Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5881-5892} }
SINR: Sparsity Driven Compressed Implicit Neural Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jayasundara_2025_CVPR, author = {Jayasundara, Dhananjaya and Rajagopalan, Sudarshan and Ranasinghe, Yasiru and Tran, Trac D. and Patel, Vishal M.}, title = {SINR: Sparsity Driven Compressed Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3061-3070} }
ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Kailin and Li, Puhao and Liu, Tengyu and Li, Yuyang and Huang, Siyuan}, title = {ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6991-7003} }
Shining Yourself: High-Fidelity Ornaments Virtual Try-on with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2025_CVPR, author = {Miao, Yingmao and Huang, Zhanpeng and Han, Rui and Wang, Zibin and Lin, Chenhao and Shen, Chao}, title = {Shining Yourself: High-Fidelity Ornaments Virtual Try-on with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {359-368} }
Universal Domain Adaptation for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choe_2025_CVPR, author = {Choe, Seun-An and Park, Keon-Hee and Choi, Jinwoo and Park, Gyeong-Moon}, title = {Universal Domain Adaptation for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4607-4617} }
HyperGS: Hyperspectral 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thirgood_2025_CVPR, author = {Thirgood, Christopher and Mendez, Oscar and Ling, Erin and Storey, Jon and Hadfield, Simon}, title = {HyperGS: Hyperspectral 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5970-5979} }
LMO: Linear Mamba Operator for MRI Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wei and Jiang, Jiawei and Wu, Jie and Yu, Kaihao and Zheng, Jianwei}, title = {LMO: Linear Mamba Operator for MRI Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5112-5122} }
AnomalyNCD: Towards Novel Anomaly Class Discovery in Industrial Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Ziming and Li, Xurui and Liu, Haotian and Xue, Feng and Wang, Yuzhe and Zhou, Yu}, title = {AnomalyNCD: Towards Novel Anomaly Class Discovery in Industrial Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4755-4765} }
Your Large Vision-Language Model Only Needs A Few Attention Heads For Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Seil and Kim, Jinyeong and Kim, Junhyeok and Hwang, Seong Jae}, title = {Your Large Vision-Language Model Only Needs A Few Attention Heads For Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9339-9350} }
Ges3ViG : Incorporating Pointing Gestures into Language-Based 3D Visual Grounding for Embodied Reference Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mane_2025_CVPR, author = {Mane, Atharv Mahesh and Weerakoon, Dulanga and Subbaraju, Vigneshwaran and Sen, Sougata and Sarma, Sanjay E. and Misra, Archan}, title = {Ges3ViG : Incorporating Pointing Gestures into Language-Based 3D Visual Grounding for Embodied Reference Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9017-9026} }
Progressive Focused Transformer for Single Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2025_CVPR, author = {Long, Wei and Zhou, Xingyu and Zhang, Leheng and Gu, Shuhang}, title = {Progressive Focused Transformer for Single Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2279-2288} }
VladVA: Discriminative Fine-tuning of LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouali_2025_CVPR, author = {Ouali, Yassine and Bulat, Adrian and Xenos, Alexandros and Zaganidis, Anestis and Metaxas, Ioannis Maniadis and Martinez, Brais and Tzimiropoulos, Georgios}, title = {VladVA: Discriminative Fine-tuning of LVLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4101-4111} }
HumanMM: Global Human Motion Recovery from Multi-shot Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuhong and Wu, Guanlin and Chen, Ling-Hao and Zhao, Zhuokai and Lin, Jing and Jiang, Xiaoke and Wu, Jiamin and Li, Zhuoheng and Yang, Hao Frank and Wang, Haoqian and Zhang, Lei}, title = {HumanMM: Global Human Motion Recovery from Multi-shot Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1973-1983} }
Removing Reflections from RAW Photos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kee_2025_CVPR, author = {Kee, Eric and Pikielny, Adam and Blackburn-Matzen, Kevin and Levoy, Marc}, title = {Removing Reflections from RAW Photos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {161-171} }
AMR-Transformer: Enabling Efficient Long-range Interaction for Complex Neural Fluid Simulation-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Zeyi and Liu, Jinfan and Chen, Kuangxu and Chen, Ye and Hu, Zhangli and Ni, Bingbing}, title = {AMR-Transformer: Enabling Efficient Long-range Interaction for Complex Neural Fluid Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5804-5813} }
Blurry-Edges: Photon-Limited Depth Estimation from Defocused Boundaries-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Wei and Wagner, Charles James and Luo, Junjie and Guo, Qi}, title = {Blurry-Edges: Photon-Limited Depth Estimation from Defocused Boundaries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {432-441} }
MICAS: Multi-grained In-Context Adaptive Sampling for 3D Point Cloud Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Feifei and Liu, Ping and Wang, Zhao and Luo, Yawei and Wang, Hongwei and Xiao, Jun}, title = {MICAS: Multi-grained In-Context Adaptive Sampling for 3D Point Cloud Processing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6616-6626} }
GoalFlow: Goal-Driven Flow Matching for Multimodal Trajectories Generation in End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Zebin and Zhang, Xingyu and Hu, Yang and Jiang, Bo and He, Tong and Zhang, Qian and Long, Xiaoxiao and Yin, Wei}, title = {GoalFlow: Goal-Driven Flow Matching for Multimodal Trajectories Generation in End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1602-1611} }
ColabSfM: Collaborative Structure-from-Motion by Point Cloud Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Edstedt_2025_CVPR, author = {Edstedt, Johan and Mateus, Andr\'e and Jaenal, Alberto}, title = {ColabSfM: Collaborative Structure-from-Motion by Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6573-6583} }
MangaNinja: Line Art Colorization with Precise Reference Following-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhiheng and Cheng, Ka Leong and Chen, Xi and Xiao, Jie and Ouyang, Hao and Zhu, Kai and Liu, Yu and Shen, Yujun and Chen, Qifeng and Luo, Ping}, title = {MangaNinja: Line Art Colorization with Precise Reference Following}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5666-5677} }
Nonisotropic Gaussian Diffusion for Realistic 3D Human Motion Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Curreli_2025_CVPR, author = {Curreli, Cecilia and Muhle, Dominik and Saroha, Abhishek and Ye, Zhenzhang and Marin, Riccardo and Cremers, Daniel}, title = {Nonisotropic Gaussian Diffusion for Realistic 3D Human Motion Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1871-1882} }
PICO: Reconstructing 3D People In Contact with Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cseke_2025_CVPR, author = {Cseke, Alp\'ar and Tripathi, Shashank and Dwivedi, Sai Kumar and Lakshmipathy, Arjun S. and Chatterjee, Agniv and Black, Michael J. and Tzionas, Dimitrios}, title = {PICO: Reconstructing 3D People In Contact with Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1783-1794} }
Linguistics-aware Masked Image Modeling for Self-supervised Scene Text Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yifei and Liu, Chang and Wei, Jin and Yang, Xiaomeng and Zhou, Yu and Ma, Can and Ji, Xiangyang}, title = {Linguistics-aware Masked Image Modeling for Self-supervised Scene Text Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9318-9328} }
Scaling up Image Segmentation across Data and Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Pei and Cai, Zhaowei and Yang, Hao and Swaminathan, Ashwin and Manmatha, R. and Soatto, Stefano}, title = {Scaling up Image Segmentation across Data and Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4573-4583} }
Bridging Past and Future: End-to-End Autonomous Driving with Historical Prediction and Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bozhou and Song, Nan and Jin, Xin and Zhang, Li}, title = {Bridging Past and Future: End-to-End Autonomous Driving with Historical Prediction and Planning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6854-6863} }
Blood Flow Speed Estimation with Optical Coherence Tomography Angiography Images-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Wensheng and Li, Zhenghong and Ren, Jiaxiang and Jeong, Hyomin and Du, Congwu and Pan, Yingtian and Ling, Haibin}, title = {Blood Flow Speed Estimation with Optical Coherence Tomography Angiography Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10466-10475} }
DreamTrack: Dreaming the Future for Multimodal Visual Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Mingzhe and Tan, Weiping and Ran, Wenyu and Jing, Liping and Zhang, Zhipeng}, title = {DreamTrack: Dreaming the Future for Multimodal Visual Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7201-7210} }
OmniStyle: Filtering High Quality Style Transfer Data at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ye and Liu, Ruiqi and Lin, Jiang and Liu, Fei and Yi, Zili and Wang, Yilin and Ma, Rui}, title = {OmniStyle: Filtering High Quality Style Transfer Data at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7847-7856} }
Cross-View Completion Models are Zero-shot Correspondence Estimators-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2025_CVPR, author = {An, Honggyu and Kim, Jin Hyeon and Park, Seonghoon and Jung, Jaewoo and Han, Jisang and Hong, Sunghwan and Kim, Seungryong}, title = {Cross-View Completion Models are Zero-shot Correspondence Estimators}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1103-1115} }
Multi-party Collaborative Attention Control for Image Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Han and Yang, Chuanguang and Wang, Qiuli and An, Zhulin and Feng, Weilun and Huang, Libo and Xu, Yongjun}, title = {Multi-party Collaborative Attention Control for Image Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7942-7951} }
HOT3D: Hand and Object Tracking in 3D from Egocentric Multi-View Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Banerjee_2025_CVPR, author = {Banerjee, Prithviraj and Shkodrani, Sindi and Moulon, Pierre and Hampali, Shreyas and Han, Shangchen and Zhang, Fan and Zhang, Linguang and Fountain, Jade and Miller, Edward and Basol, Selen and Newcombe, Richard and Wang, Robert and Engel, Jakob Julian and Hodan, Tomas}, title = {HOT3D: Hand and Object Tracking in 3D from Egocentric Multi-View Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7061-7071} }
DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Zhiqiang and Sherif, Ammar and Yin, Zeyuan and Shao, Shitong}, title = {DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4797-4806} }
RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Yuheng and Tan, Huajie and Shi, Jiayu and Hao, Xiaoshuai and Zhang, Yuan and Zhang, Hengyuan and Wang, Pengwei and Zhao, Mengdi and Mu, Yao and An, Pengju and Xue, Xinda and Su, Qinghang and Lyu, Huaihai and Zheng, Xiaolong and Liu, Jiaming and Wang, Zhongyuan and Zhang, Shanghang}, title = {RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1724-1734} }
Beyond Image Classification: A Video Benchmark and Dual-Branch Hybrid Discrimination Framework for Compositional Zero-Shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Dongyao and Jing, Haodong and Ma, Yongqiang and Zheng, Nanning}, title = {Beyond Image Classification: A Video Benchmark and Dual-Branch Hybrid Discrimination Framework for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9860-9869} }
ABBSPO: Adaptive Bounding Box Scaling and Symmetric Prior based Orientation Prediction for Detecting Aerial Image Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Woojin and Chang, Hyugjae and Moon, Jaeho and Lee, Jaehyup and Kim, Munchurl}, title = {ABBSPO: Adaptive Bounding Box Scaling and Symmetric Prior based Orientation Prediction for Detecting Aerial Image Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8848-8858} }
Do Your Best and Get Enough Rest for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Hankyul and Seifer, Gregor and Lee, Donghyun and Ryu, Jongbin}, title = {Do Your Best and Get Enough Rest for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10077-10086} }
Enhancing Few-Shot Class-Incremental Learning via Training-Free Bi-Level Modality Calibration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yiyang and Ding, Tianyu and Wang, Lei and Huo, Jing and Gao, Yang and Li, Wenbin}, title = {Enhancing Few-Shot Class-Incremental Learning via Training-Free Bi-Level Modality Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9881-9890} }
MUSt3R: Multi-view Network for Stereo 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cabon_2025_CVPR, author = {Cabon, Yohann and Stoffl, Lucas and Antsfeld, Leonid and Csurka, Gabriela and Chidlovskii, Boris and Revaud, Jerome and Leroy, Vincent}, title = {MUSt3R: Multi-view Network for Stereo 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1050-1060} }
Hybrid-Level Instruction Injection for Video Token Compression in Multi-modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhihang and Xie, Chen-Wei and Li, Pandeng and Zhao, Liming and Tang, Longxiang and Zheng, Yun and Liu, Chuanbin and Xie, Hongtao}, title = {Hybrid-Level Instruction Injection for Video Token Compression in Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8568-8578} }
A New Statistical Model of Star Speckles for Learning to Detect and Characterize Exoplanets in Direct Imaging Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bodrito_2025_CVPR, author = {Bodrito, Th\'eo and Flasseur, Olivier and Mairal, Julien and Ponce, Jean and Langlois, Maud and Lagrange, Anne-Marie}, title = {A New Statistical Model of Star Speckles for Learning to Detect and Characterize Exoplanets in Direct Imaging Observations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1230-1240} }
CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Qingqing and Lu, Yao and Kim, Moo Jin and Fu, Zipeng and Zhang, Zhuoyang and Wu, Yecheng and Li, Zhaoshuo and Ma, Qianli and Han, Song and Finn, Chelsea and Handa, Ankur and Lin, Tsung-Yi and Wetzstein, Gordon and Liu, Ming-Yu and Xiang, Donglai}, title = {CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1702-1713} }
WAVE: Weight Templates for Adaptive Initialization of Variable-sized Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Fu and Xie, Yucheng and Wang, Jing and Geng, Xin}, title = {WAVE: Weight Templates for Adaptive Initialization of Variable-sized Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4819-4828} }
CXPMRG-Bench: Pre-training and Benchmarking for X-ray Medical Report Generation on CheXpert Plus Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xiao and Wang, Fuling and Li, Yuehang and Ma, Qingchuan and Wang, Shiao and Jiang, Bo and Tang, Jin}, title = {CXPMRG-Bench: Pre-training and Benchmarking for X-ray Medical Report Generation on CheXpert Plus Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5123-5133} }
Event-Equalized Dense Video Captioning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Kangyi and Li, Pengna and Fu, Jingwen and Li, Yizhe and Wu, Yang and Liu, Yuhan and Wang, Jinjun and Zhou, Sanping}, title = {Event-Equalized Dense Video Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8417-8427} }
EDCFlow: Exploring Temporally Dense Difference Maps for Event-based Optical Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Daikun and Cheng, Lei and Wang, Teng and Sun, Changyin}, title = {EDCFlow: Exploring Temporally Dense Difference Maps for Event-based Optical Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1984-1993} }
LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mehri_2025_CVPR, author = {Mehri, Faridoun and Baghshah, Mahdieh Soleymani and Pilehvar, Mohammad Taher}, title = {LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {67-78} }
Lost in Translation, Found in Context: Sign Language Translation with Contextual Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Youngjoon and Raajesh, Haran and Momeni, Liliane and Varol, G\"ul and Zisserman, Andrew}, title = {Lost in Translation, Found in Context: Sign Language Translation with Contextual Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8742-8752} }
Synchronized Video-to-Audio Generation via Mel Quantization-Continuum Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Juncheng and Xu, Chao and Yu, Cheng and Shang, Lei and Hu, Zhe and Wang, Shujun and Bo, Liefeng}, title = {Synchronized Video-to-Audio Generation via Mel Quantization-Continuum Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3111-3120} }
FATE: Full-head Gaussian Avatar with Textural Editing from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiawei and Wu, Zijian and Liang, Zhiyang and Gong, Yicheng and Hu, Dongfang and Yao, Yao and Cao, Xun and Zhu, Hao}, title = {FATE: Full-head Gaussian Avatar with Textural Editing from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5535-5545} }
Touch2Shape: Touch-Conditioned 3D Diffusion for Shape Exploration and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuanbo and Zhang, Zhaoxuan and Qiu, Jiajin and Sun, Dilong and Meng, Zhengyu and Wei, Xiaopeng and Yang, Xin}, title = {Touch2Shape: Touch-Conditioned 3D Diffusion for Shape Exploration and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5656-5665} }
VITED: Video Temporal Evidence Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Yujie and Song, Yale and Wang, William and Torresani, Lorenzo and Nagarajan, Tushar}, title = {VITED: Video Temporal Evidence Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8501-8511} }
Temporal Score Analysis for Understanding and Correcting Diffusion Artifacts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Yu and Zhao, Zengqun and Patras, Ioannis and Gong, Shaogang}, title = {Temporal Score Analysis for Understanding and Correcting Diffusion Artifacts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7707-7716} }
Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise-
[pdf]
[supp]
[bibtex]@InProceedings{Burgert_2025_CVPR, author = {Burgert, Ryan and Xu, Yuancheng and Xian, Wenqi and Pilarski, Oliver and Clausen, Pascal and He, Mingming and Ma, Li and Deng, Yitong and Li, Lingxiao and Mousavi, Mohsen and Ryoo, Michael and Debevec, Paul and Yu, Ning}, title = {Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13-23} }
Vision-Language Gradient Descent-driven All-in-One Deep Unfolding Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Haijin and Wang, Xiangming and Chen, Yongyong and Su, Jingyong and Liu, Jie}, title = {Vision-Language Gradient Descent-driven All-in-One Deep Unfolding Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7524-7533} }
3D-LLaVA: Towards Generalist 3D LMMs with Omni Superpoint Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Jiajun and He, Tianyu and Jiang, Li and Wang, Tianyu and Dayoub, Feras and Reid, Ian}, title = {3D-LLaVA: Towards Generalist 3D LMMs with Omni Superpoint Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3772-3782} }
Boost the Inference with Co-training: A Depth-guided Mutual Learning Framework for Semi-supervised Medical Polyp Segmentation-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuxin and Zhu, Zihao and Zhang, Yuxiang and Chen, Yifan and Yu, Zhibin}, title = {Boost the Inference with Co-training: A Depth-guided Mutual Learning Framework for Semi-supervised Medical Polyp Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10394-10403} }
From Laboratory to Real World: A New Benchmark Towards Privacy-Preserved Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yan and Yu, Hao and Cheng, Xu and Chen, Haoyu and Sun, Zhaodong and Zhao, Guoying}, title = {From Laboratory to Real World: A New Benchmark Towards Privacy-Preserved Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8828-8837} }
4Deform: Neural Surface Deformation for Robust Shape Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sang_2025_CVPR, author = {Sang, Lu and Canfes, Zehranaz and Cao, Dongliang and Marin, Riccardo and Bernard, Florian and Cremers, Daniel}, title = {4Deform: Neural Surface Deformation for Robust Shape Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6542-6551} }
Dense Match Summarization for Faster Two-view Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Astermark_2025_CVPR, author = {Astermark, Jonathan and Heyden, Anders and Larsson, Viktor}, title = {Dense Match Summarization for Faster Two-view Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1093-1102} }
Align-A-Video: Deterministic Reward Tuning of Image Diffusion Models for Consistent Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shengzhi and Zhong, Yingkang and Mu, Jiangchuan and Wu, Kai and Xiong, Mingliang and Fang, Wen and Liu, Mingqing and Deng, Hao and He, Bin and Li, Gang and Liu, Qingwen}, title = {Align-A-Video: Deterministic Reward Tuning of Image Diffusion Models for Consistent Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2074-2083} }
LION-FS: Fast & Slow Video-Language Thinker as Online Video Assistant-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wei and Hu, Bing and Shao, Rui and Shen, Leyang and Nie, Liqiang}, title = {LION-FS: Fast \& Slow Video-Language Thinker as Online Video Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3240-3251} }
Motion-Grounded Video Reasoning: Understanding and Perceiving Motion at Pixel Level-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Andong and Chen, Tongjia and Yu, Shoubin and Yang, Taojiannan and Spencer, Lincoln and Tian, Yapeng and Mian, Ajmal Saeed and Bansal, Mohit and Chen, Chen}, title = {Motion-Grounded Video Reasoning: Understanding and Perceiving Motion at Pixel Level}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8625-8636} }
Toward Robust Neural Reconstruction from Sparse Point Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouasfi_2025_CVPR, author = {Ouasfi, Amine and Jena, Shubhendu and Marchand, Eric and Boukhayma, Adnane}, title = {Toward Robust Neural Reconstruction from Sparse Point Sets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6552-6562} }
GPAvatar: High-fidelity Head Avatars by Learning Efficient Gaussian Projections-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Wei-Qi and Han, Dong and Zhou, Ze-Kang and Li, Shunkai and Liu, Xiaoqiang and Wan, Pengfei and Zhang, Di and Wang, Miao}, title = {GPAvatar: High-fidelity Head Avatars by Learning Efficient Gaussian Projections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {250-259} }
PIAD: Pose and Illumination agnostic Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Kaichen and Cao, Junjie and Bai, Zeyu and Su, Zhixun and Tagliasacchi, Andrea}, title = {PIAD: Pose and Illumination agnostic Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4734-4743} }
Two is Better than One: Efficient Ensemble Defense for Robust and Compact Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Yoojin and Song, Byung Cheol}, title = {Two is Better than One: Efficient Ensemble Defense for Robust and Compact Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9696-9706} }
Tiled Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Madar_2025_CVPR, author = {Madar, Or and Fried, Ohad}, title = {Tiled Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7795-7804} }
Descriptor-In-Pixel : Point-Feature Tracking For Pixel Processor Arrays-
[pdf]
[supp]
[bibtex]@InProceedings{Bose_2025_CVPR, author = {Bose, Laurie and Chen, Jianing and Dudek, Piotr}, title = {Descriptor-In-Pixel : Point-Feature Tracking For Pixel Processor Arrays}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5392-5400} }
UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rai_2025_CVPR, author = {Rai, Aashish and Wang, Dilin and Jain, Mihir and Sarafianos, Nikolaos and Chen, Kefan and Sridhar, Srinath and Prakash, Aayush}, title = {UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5927-5937} }
InterAct: Advancing Large-Scale Versatile 3D Human-Object Interaction Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Sirui and Li, Dongting and Zhang, Yucheng and Xu, Xiyan and Long, Qi and Wang, Ziyin and Lu, Yunzhi and Dong, Shuchang and Jiang, Hezi and Gupta, Akshat and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterAct: Advancing Large-Scale Versatile 3D Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7048-7060} }
TAMT: Temporal-Aware Model Tuning for Cross-Domain Few-Shot Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yilong and Gao, Zilin and Wang, Qilong and Chen, Zhaofeng and Li, Peihua and Hu, Qinghua}, title = {TAMT: Temporal-Aware Model Tuning for Cross-Domain Few-Shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3449-3459} }
BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology-
[pdf]
[supp]
[bibtex]@InProceedings{Gallagher-Syed_2025_CVPR, author = {Gallagher-Syed, Amaya and Senior, Henry and Alwazzan, Omnia and Pontarini, Elena and Bombardieri, Michele and Pitzalis, Costantino and Lewis, Myles J. and Barnes, Michael R. and Rossi, Luca and Slabaugh, Gregory}, title = {BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10372-10383} }
GauCho: Gaussian Distributions with Cholesky Decomposition for Oriented Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marques_2025_CVPR, author = {Marques, Jos\'e Henrique Lima and Murrugarra-Llerena, Jeffri and Jung, Claudio R.}, title = {GauCho: Gaussian Distributions with Cholesky Decomposition for Oriented Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3593-3602} }
No Thing, Nothing: Highlighting Safety-Critical Classes for Robust LiDAR Semantic Segmentation in Adverse Weather-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Junsung and Lee, Hwijeong and Kang, Inha and Shim, Hyunjung}, title = {No Thing, Nothing: Highlighting Safety-Critical Classes for Robust LiDAR Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6690-6699} }
Mind the Gap: Detecting Black-box Adversarial Attacks in the Making through Query Update Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jeonghwan and McLaughlin, Niall and Alouani, Ihsen}, title = {Mind the Gap: Detecting Black-box Adversarial Attacks in the Making through Query Update Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10235-10243} }
GaussianWorld: Gaussian World Model for Streaming 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zuo_2025_CVPR, author = {Zuo, Sicheng and Zheng, Wenzhao and Huang, Yuanhui and Zhou, Jie and Lu, Jiwen}, title = {GaussianWorld: Gaussian World Model for Streaming 3D Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6772-6781} }
ICP: Immediate Compensation Pruning for Mid-to-high Sparsity-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Xin and Fu, Xueming and Jiang, Zihang and Zhou, S. Kevin}, title = {ICP: Immediate Compensation Pruning for Mid-to-high Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9487-9496} }
VinaBench: Benchmark for Faithful and Consistent Visual Narratives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Silin and Mathew, Sheryl and Mi, Li and Mamooler, Sepideh and Zhao, Mengjie and Wakaki, Hiromi and Mitsufuji, Yuki and Montariol, Syrielle and Bosselut, Antoine}, title = {VinaBench: Benchmark for Faithful and Consistent Visual Narratives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2870-2879} }
Dual Diffusion for Unified Image Generation and Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zijie and Li, Henry and Shi, Yichun and Farimani, Amir Barati and Kluger, Yuval and Yang, Linjie and Wang, Peng}, title = {Dual Diffusion for Unified Image Generation and Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2779-2790} }
WeakMCN: Multi-task Collaborative Network for Weakly Supervised Referring Expression Comprehension and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Silin and Liu, Yang and He, Xinwei and Ourselin, Sebastien and Tan, Lei and Luo, Gen}, title = {WeakMCN: Multi-task Collaborative Network for Weakly Supervised Referring Expression Comprehension and Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9175-9185} }
4DGC: Rate-Aware 4D Gaussian Compression for Efficient Streamable Free-Viewpoint Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Qiang and Zheng, Zihan and Zhong, Houqiang and Fu, Sihua and Song, Li and Zhang, Xiaoyun and Zhai, Guangtao and Wang, Yanfeng}, title = {4DGC: Rate-Aware 4D Gaussian Compression for Efficient Streamable Free-Viewpoint Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {875-885} }
GASP: Gaussian Avatars with Synthetic Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saunders_2025_CVPR, author = {Saunders, Jack and Hewitt, Charlie and Jian, Yanan and Kowalski, Marek and Baltrusaitis, Tadas and Chen, Yiye and Cosker, Darren and Estellers, Virginia and Gyd\'e, Nicholas and Namboodiri, Vinay P. and Lundell, Benjamin E.}, title = {GASP: Gaussian Avatars with Synthetic Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {271-280} }
COSMIC: Clique-Oriented Semantic Multi-space Integration for Robust CLIP Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Fanding and Jiang, Jingyan and Jiang, Qinting and Li, Hebei and Khan, Faisal Nadeem and Wang, Zhi}, title = {COSMIC: Clique-Oriented Semantic Multi-space Integration for Robust CLIP Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9772-9781} }
High-quality Point Cloud Oriented Normal Estimation via Hybrid Angular and Euclidean Distance Encoding-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yuanqi and Huang, Jingcheng and Wang, Hongshen and Lv, Peiyuan and Liu, Yansong and Zheng, Jiuming and Guo, Jie and Guo, Yanwen}, title = {High-quality Point Cloud Oriented Normal Estimation via Hybrid Angular and Euclidean Distance Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1287-1296} }
Prior-free 3D Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Xiuqiang and Jin, Li and Zhang, Zhengxian and Li, Jiachen and Zhong, Fan and Zhang, Guofeng and Qin, Xueying}, title = {Prior-free 3D Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1200-1209} }
Progressive Correspondence Regenerator for Robust 3D Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Guiyu and Ao, Sheng and Zhang, Ye and Xu, Kai and Guo, Yulan}, title = {Progressive Correspondence Regenerator for Robust 3D Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1210-1219} }
Cross-Modal 3D Representation with Multi-View Images and Point Clouds-
[pdf]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Ziyang and Wang, Pinghui and Liang, Zi and Bai, Haitao and Zhang, Ruofei}, title = {Cross-Modal 3D Representation with Multi-View Images and Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3728-3739} }
Decompositional Neural Scene Reconstruction with Generative Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Junfeng and Liu, Yu and Lu, Ruijie and Zhou, Zirui and Zhu, Song-Chun and Chen, Yixin and Huang, Siyuan}, title = {Decompositional Neural Scene Reconstruction with Generative Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6022-6033} }
Learning Visual Generative Priors without Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Shuailei and Zheng, Kecheng and Wei, Ying and Wu, Wei and Lu, Fan and Zhang, Yifei and Xie, Chen-Wei and Gong, Biao and Zhu, Jiapeng and Shen, Yujun}, title = {Learning Visual Generative Priors without Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8051-8061} }
Lifting the Veil on Visual Information Flow in MLLMs: Unlocking Pathways to Faster Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Hao and Si, Guangzong and Wang, Zilei}, title = {Lifting the Veil on Visual Information Flow in MLLMs: Unlocking Pathways to Faster Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9382-9391} }
Pixel-level and Semantic-level Adjustable Super-resolution: A Dual-LoRA Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Lingchen and Wu, Rongyuan and Ma, Zhiyuan and Liu, Shuaizheng and Yi, Qiaosi and Zhang, Lei}, title = {Pixel-level and Semantic-level Adjustable Super-resolution: A Dual-LoRA Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2333-2343} }
Mr. DETR: Instructive Multi-Route Training for Detection Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chang-Bin and Zhong, Yujie and Han, Kai}, title = {Mr. DETR: Instructive Multi-Route Training for Detection Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9933-9943} }
Hearing Hands: Generating Sounds from Physical Interactions in 3D Scenes-
[pdf]
[bibtex]@InProceedings{Dou_2025_CVPR, author = {Dou, Yiming and Oh, Wonseok and Luo, Yuqing and Loquercio, Antonio and Owens, Andrew}, title = {Hearing Hands: Generating Sounds from Physical Interactions in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1795-1804} }
AirRoom: Objects Matter in Room Reidentification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2025_CVPR, author = {Yao, Runmao and Du, Yi and Chen, Zhuoqun and Zheng, Haoze and Wang, Chen}, title = {AirRoom: Objects Matter in Room Reidentification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1385-1394} }
DefMamba: Deformable Visual State Space Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Leiye and Zhang, Miao and Yin, Jihao and Liu, Tingwei and Ji, Wei and Piao, Yongri and Lu, Huchuan}, title = {DefMamba: Deformable Visual State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8838-8847} }
HOP: Heterogeneous Topology-based Multimodal Entanglement for Co-Speech Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Hongye and Wang, Tianyu and Shi, Guangsi and Zhao, Zexing and Fu, Yanwei}, title = {HOP: Heterogeneous Topology-based Multimodal Entanglement for Co-Speech Gesture Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {906-916} }
VoxelSplat: Dynamic Gaussian Splatting as an Effective Loss for Occupancy and Flow Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ziyue and Wang, Shenlong and Xie, Jin and Liu, Jiang-jiang and Wang, Jingdong and Yang, Jian}, title = {VoxelSplat: Dynamic Gaussian Splatting as an Effective Loss for Occupancy and Flow Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6761-6771} }
ControlFace: Harnessing Facial Parametric Control for Face Rigging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Wooseok and Hong, Youngjun and Cha, Geonho and Kim, Seungryong}, title = {ControlFace: Harnessing Facial Parametric Control for Face Rigging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5614-5624} }
Imputation-free and Alignment-free: Incomplete Multi-view Clustering Driven by Consensus Semantic Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Yuzhuo and Jin, Jiaqi and Dong, Zhibin and Wang, Siwei and Liu, Xinwang and Zhu, En and Yang, Xihong and Gan, Xinbiao and Feng, Yu}, title = {Imputation-free and Alignment-free: Incomplete Multi-view Clustering Driven by Consensus Semantic Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5071-5081} }
Sensitivity-Aware Efficient Fine-Tuning via Compact Dynamic-Rank Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Tianran and Chen, Jiarui and Zhang, Baoquan and Yu, Zhehao and Chen, Shidong and Ye, Rui and Li, Xutao and Ye, Yunming}, title = {Sensitivity-Aware Efficient Fine-Tuning via Compact Dynamic-Rank Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9655-9664} }
A Selective Re-learning Mechanism for Hyperspectral Fusion Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanye and Liu, Jinyang and Dian, Renwei and Li, Shutao}, title = {A Selective Re-learning Mechanism for Hyperspectral Fusion Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7437-7446} }
Autoregressive Sequential Pretraining for Visual Tracking-
[pdf]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Shiyi and Bai, Yifan and Gong, Yihong and Wei, Xing}, title = {Autoregressive Sequential Pretraining for Visual Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7254-7264} }
PromptHMR: Promptable Human Mesh Recovery-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yufu and Sun, Yu and Patel, Priyanka and Daniilidis, Kostas and Black, Michael J. and Kocabas, Muhammed}, title = {PromptHMR: Promptable Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1148-1159} }
VISTREAM: Improving Computation Efficiency of Visual Streaming Perception via Law-of-Charge-Conservation Inspired Spiking Neural Network-
[pdf]
[supp]
[bibtex]@InProceedings{You_2025_CVPR, author = {You, Kang and Wei, Ziling and Yan, Jing and Zhang, Boning and Guo, Qinghai and Zhang, Yaoyu and He, Zhezhi}, title = {VISTREAM: Improving Computation Efficiency of Visual Streaming Perception via Law-of-Charge-Conservation Inspired Spiking Neural Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8796-8805} }
STPro: Spatial and Temporal Progressive Learning for Weakly Supervised Spatio-Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garg_2025_CVPR, author = {Garg, Aaryan and Kumar, Akash and Rawat, Yogesh S}, title = {STPro: Spatial and Temporal Progressive Learning for Weakly Supervised Spatio-Temporal Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3384-3394} }
Rashomon Sets for Prototypical-Part Networks: Editing Interpretable Models in Real-Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Donnelly_2025_CVPR, author = {Donnelly, Jon and Guo, Zhicheng and Barnett, Alina Jade and McTavish, Hayden and Chen, Chaofan and Rudin, Cynthia}, title = {Rashomon Sets for Prototypical-Part Networks: Editing Interpretable Models in Real-Time}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4528-4538} }
EnvPoser: Environment-aware Realistic Human Motion Estimation from Sparse Observations with Uncertainty Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Songpengcheng and Zhang, Yu and Su, Zhuo and Zheng, Xiaozheng and Lv, Zheng and Wang, Guidong and Zhang, Yongjie and Wu, Qi and Chu, Lei and Pei, Ling}, title = {EnvPoser: Environment-aware Realistic Human Motion Estimation from Sparse Observations with Uncertainty Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1839-1849} }
Tuning the Frequencies: Robust Training for Sinusoidal Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Novello_2025_CVPR, author = {Novello, Tiago and Aldana, Diana and Araujo, Andre and Velho, Luiz}, title = {Tuning the Frequencies: Robust Training for Sinusoidal Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3071-3080} }
Real-time Free-view Human Rendering from Sparse-view RGB Videos using Double Unprojected Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Guoxing and Dabral, Rishabh and Zhu, Heming and Fua, Pascal and Theobalt, Christian and Habermann, Marc}, title = {Real-time Free-view Human Rendering from Sparse-view RGB Videos using Double Unprojected Textures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {562-573} }
Large Self-Supervised Models Bridge the Gap in Domain Adaptive Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lavoie_2025_CVPR, author = {Lavoie, Marc-Antoine and Mahmoud, Anas and Waslander, Steven L.}, title = {Large Self-Supervised Models Bridge the Gap in Domain Adaptive Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4692-4702} }
Evaluating Model Perception of Color Illusions in Photorealistic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Lingjun and Tang, Zineng and Suhr, Alane}, title = {Evaluating Model Perception of Color Illusions in Photorealistic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7805-7814} }
Do Visual Imaginations Improve Vision-and-Language Navigation Agents?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Perincherry_2025_CVPR, author = {Perincherry, Akhil and Krantz, Jacob and Lee, Stefan}, title = {Do Visual Imaginations Improve Vision-and-Language Navigation Agents?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3846-3855} }
HotSpot: Signed Distance Function Optimization with an Asymptotically Sufficient Condition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zimo and Wang, Cheng and Yoshino, Taiki and Tao, Sirui and Fu, Ziyang and Li, Tzu-Mao}, title = {HotSpot: Signed Distance Function Optimization with an Asymptotically Sufficient Condition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1276-1286} }
Libra-Merging: Importance-redundancy and Pruning-merging Trade-off for Acceleration Plug-in in Large Vision-Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Longrong and Shen, Dong and Cai, Chaoxiang and Chen, Kaibing and Yang, Fan and Gao, Tingting and Zhang, Di and Li, Xi}, title = {Libra-Merging: Importance-redundancy and Pruning-merging Trade-off for Acceleration Plug-in in Large Vision-Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9402-9412} }
LEDiff: Latent Exposure Diffusion for HDR Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Chao and Xia, Zhihao and Leimkuhler, Thomas and Myszkowski, Karol and Zhang, Xuaner}, title = {LEDiff: Latent Exposure Diffusion for HDR Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {453-464} }
VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Ziyang and Wu, Haoning and Li, Dongxu and Ma, Jing and Kankanhalli, Mohan and Li, Junnan}, title = {VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8461-8474} }
Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guizilini_2025_CVPR, author = {Guizilini, Vitor and Irshad, Muhammad Zubair and Chen, Dian and Shakhnarovich, Greg and Ambrus, Rares}, title = {Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {764-776} }
Unveiling Visual Perception in Language Models: An Attention Head Analysis Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bi_2025_CVPR, author = {Bi, Jing and Guo, Junjia and Tang, Yunlong and Wen, Lianggong Bruce and Liu, Zhang and Wang, Bingjie and Xu, Chenliang}, title = {Unveiling Visual Perception in Language Models: An Attention Head Analysis Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4135-4144} }
SemiDAViL: Semi-supervised Domain Adaptation with Vision-Language Guidance for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Basak_2025_CVPR, author = {Basak, Hritam and Yin, Zhaozheng}, title = {SemiDAViL: Semi-supervised Domain Adaptation with Vision-Language Guidance for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9816-9828} }
dFLMoE: Decentralized Federated Learning via Mixture of Experts for Medical Data Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Luyuan and Luan, Tianyu and Cai, Wenyuan and Yan, Guochen and Chen, Zhaoyu and Xi, Nan and Fang, Yuejian and Shen, Qingni and Wu, Zhonghai and Yuan, Junsong}, title = {dFLMoE: Decentralized Federated Learning via Mixture of Experts for Medical Data Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10203-10213} }
Reconstructing Humans with a Biomechanically Accurate Skeleton-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Yan and Zhou, Xiaowei and Vouga, Etienne and Huang, Qixing and Pavlakos, Georgios}, title = {Reconstructing Humans with a Biomechanically Accurate Skeleton}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5355-5365} }
AdaCM^2: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction-
[pdf]
[bibtex]@InProceedings{Man_2025_CVPR, author = {Man, Yuanbin and Huang, Ying and Zhang, Chengming and Li, Bingzhe and Niu, Wei and Yin, Miao}, title = {AdaCM{\textasciicircum}2: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8534-8544} }
VGGT: Visual Geometry Grounded Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jianyuan and Chen, Minghao and Karaev, Nikita and Vedaldi, Andrea and Rupprecht, Christian and Novotny, David}, title = {VGGT: Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5294-5306} }
Silent Branding Attack: Trigger-free Data Poisoning Attack on Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Sangwon and Choi, June Suk and Jo, Jaehyeong and Lee, Kimin and Hwang, Sung Ju}, title = {Silent Branding Attack: Trigger-free Data Poisoning Attack on Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8203-8212} }
Visual Consensus Prompting for Co-Salient Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jie and Yu, Nana and Zhang, Zihao and Han, Yahong}, title = {Visual Consensus Prompting for Co-Salient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9591-9600} }
Quantization without Tears-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Minghao and Yu, Hao and Shao, Jie and Zhou, Junjie and Zhu, Ke and Wu, Jianxin}, title = {Quantization without Tears}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4462-4472} }
PHGC: Procedural Heterogeneous Graph Completion for Natural Language Task Verification in Egocentric Videos-
[pdf]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Xun and Huang, Zhiyi and Xu, Xing and Song, Jingkuan and Shen, Fumin and Shen, Heng Tao}, title = {PHGC: Procedural Heterogeneous Graph Completion for Natural Language Task Verification in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8615-8624} }
Towards Consistent Multi-Task Learning: Unlocking the Potential of Task-Specific Parameters-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Xiaohan and Wang, Xiaoxing and Yan, Junchi}, title = {Towards Consistent Multi-Task Learning: Unlocking the Potential of Task-Specific Parameters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10067-10076} }
SceneFactor: Factored Latent 3D Diffusion for Controllable 3D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bokhovkin_2025_CVPR, author = {Bokhovkin, Aleksey and Meng, Quan and Tulsiani, Shubham and Dai, Angela}, title = {SceneFactor: Factored Latent 3D Diffusion for Controllable 3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {628-639} }
HiFi-Portrait: Zero-shot Identity-preserved Portrait Generation with High-fidelity Multi-face Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yifang and Zhai, Benxiang and Sun, Yunzhuo and Li, Ming and Li, Yang and Du, Sidan}, title = {HiFi-Portrait: Zero-shot Identity-preserved Portrait Generation with High-fidelity Multi-face Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5625-5635} }
FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Wonjoon and Dai, Qi and Luo, Chong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2040-2049} }
RAD: Region-Aware Diffusion Models for Image Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Sora and Suh, Sungho and Lee, Minsik}, title = {RAD: Region-Aware Diffusion Models for Image Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2439-2448} }
Understanding Fine-tuning CLIP for Open-vocabulary Semantic Segmentation in Hyperbolic Space-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Wen, Changsong and Huang, Yu and Yang, Menglin and Tang, Feilong and Shen, Wei}, title = {Understanding Fine-tuning CLIP for Open-vocabulary Semantic Segmentation in Hyperbolic Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4562-4572} }
TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2025_CVPR, author = {Xiong, Bojun and Liu, Jialun and Hu, Jiakui and Wu, Chenming and Wu, Jinbo and Liu, Xing and Zhao, Chen and Ding, Errui and Lian, Zhouhui}, title = {TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {551-561} }
Concept Replacer: Replacing Sensitive Concepts in Diffusion Models via Precision Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lingyun and Xie, Yu and Fu, Yanwei and Chen, Ping}, title = {Concept Replacer: Replacing Sensitive Concepts in Diffusion Models via Precision Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8172-8181} }
A Regularization-Guided Equivariant Approach for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Yulu and Fu, Jiahong and Xie, Qi and Meng, Deyu}, title = {A Regularization-Guided Equivariant Approach for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2300-2310} }
Deep Fair Multi-View Clustering with Attention KAN-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, HaiMing and Wang, Qianqian and Wang, Boyue and Gao, Quanxue}, title = {Deep Fair Multi-View Clustering with Attention KAN}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5061-5070} }
LineArt: A Knowledge-guided Training-free High-quality Appearance Transfer for Design Drawing with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xi and Li, Hongzhen and Fang, Heng and Peng, Yichen and Xie, Haoran and Yang, Xi and Li, Chuntao}, title = {LineArt: A Knowledge-guided Training-free High-quality Appearance Transfer for Design Drawing with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2912-2923} }
VideoICL: Confidence-based Iterative In-context Learning for Out-of-Distribution Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Kangsan and Park, Geon and Lee, Youngwan and Yeo, Woongyeong and Hwang, Sung Ju}, title = {VideoICL: Confidence-based Iterative In-context Learning for Out-of-Distribution Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3295-3305} }
Zero-Shot Image Restoration Using Few-Step Guidance of Consistency Models (and Beyond)-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garber_2025_CVPR, author = {Garber, Tomer and Tirer, Tom}, title = {Zero-Shot Image Restoration Using Few-Step Guidance of Consistency Models (and Beyond)}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2398-2407} }
Similarity-Guided Layer-Adaptive Vision Transformer for UAV Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xue_2025_CVPR, author = {Xue, Chaocan and Zhong, Bineng and Liang, Qihua and Zheng, Yaozong and Li, Ning and Xue, Yuanliang and Song, Shuxiang}, title = {Similarity-Guided Layer-Adaptive Vision Transformer for UAV Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6730-6740} }
LidarGait++: Learning Local Features and Size Awareness from LiDAR Point Clouds for 3D Gait Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Chuanfu and Wang, Rui and Duan, Lixin and Yu, Shiqi}, title = {LidarGait++: Learning Local Features and Size Awareness from LiDAR Point Clouds for 3D Gait Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6627-6636} }
Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Benlin and Dong, Yuhao and Wang, Yiqin and Ma, Zixian and Tang, Yansong and Tang, Luming and Rao, Yongming and Ma, Wei-Chiu and Krishna, Ranjay}, title = {Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3783-3792} }
FoundationStereo: Zero-Shot Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Bowen and Trepte, Matthew and Aribido, Joseph and Kautz, Jan and Gallo, Orazio and Birchfield, Stan}, title = {FoundationStereo: Zero-Shot Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5249-5260} }
UniNet: A Contrastive Learning-guided Unified Framework with Feature Selection for Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Shun and Jiang, Jielin and Xu, Xiaolong}, title = {UniNet: A Contrastive Learning-guided Unified Framework with Feature Selection for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9994-10003} }
MoEdit: On Learning Quantity Perception for Multi-object Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yanfeng and Chan, Kahou and Sun, Yue and Lam, Chantong and Tong, Tong and Yu, Zitong and Fu, Keren and Liu, Xiaohong and Tan, Tao}, title = {MoEdit: On Learning Quantity Perception for Multi-object Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2683-2693} }
Seeing More with Less: Human-like Representations in Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Gizdov_2025_CVPR, author = {Gizdov, Andrey and Ullman, Shimon and Harari, Daniel}, title = {Seeing More with Less: Human-like Representations in Vision Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4408-4417} }
Modeling Thousands of Human Annotators for Generalizable Text-to-Image Person Re-identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jiayu and Ding, Changxing and Tan, Wentao and Wang, Junhong and Tao, Jin and Xu, Xiangmin}, title = {Modeling Thousands of Human Annotators for Generalizable Text-to-Image Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9220-9230} }
AeroGen: Enhancing Remote Sensing Object Detection with Diffusion-Driven Data Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Datao and Cao, Xiangyong and Wu, Xuan and Li, Jialin and Yao, Jing and Bai, Xueru and Jiang, Dongsheng and Li, Yin and Meng, Deyu}, title = {AeroGen: Enhancing Remote Sensing Object Detection with Diffusion-Driven Data Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3614-3624} }
Tra-MoE: Learning Trajectory Prediction Model from Multiple Domains for Adaptive Policy Conditioning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jiange and Zhu, Haoyi and Wang, Yating and Wu, Gangshan and He, Tong and Wang, Limin}, title = {Tra-MoE: Learning Trajectory Prediction Model from Multiple Domains for Adaptive Policy Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6960-6970} }
Style Quantization for Data-Efficient GAN Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Lan, Xin and Zhou, Jizhe and Tian, Yuxin and Lv, Jiancheng}, title = {Style Quantization for Data-Efficient GAN Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7696-7706} }
Localizing Events in Videos with Multimodal Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Gengyuan and Fok, Mang Ling Ada and Ma, Jialu and Xia, Yan and Cremers, Daniel and Torr, Philip and Tresp, Volker and Gu, Jindong}, title = {Localizing Events in Videos with Multimodal Queries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3339-3351} }
PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Weijie and Tao, Manli and Zhao, Chaoyang and Guo, Haiyun and Dong, Honghui and Tang, Ming and Wang, Jinqiao}, title = {PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6940-6949} }
CleanDIFT: Diffusion Features without Noise-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stracke_2025_CVPR, author = {Stracke, Nick and Baumann, Stefan Andreas and Bauer, Kolja and Fundel, Frank and Ommer, Bj\"orn}, title = {CleanDIFT: Diffusion Features without Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {117-127} }
MAD: Memory-Augmented Detection of 3D Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Agro_2025_CVPR, author = {Agro, Ben and Casas, Sergio and Wang, Patrick and Gilles, Thomas and Urtasun, Raquel}, title = {MAD: Memory-Augmented Detection of 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1449-1460} }
Doppelgangers and Adversarial Vulnerability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kamberov_2025_CVPR, author = {Kamberov, George}, title = {Doppelgangers and Adversarial Vulnerability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10244-10254} }
Precise Event Spotting in Sports Videos: Solving Long-Range Dependency and Class Imbalance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Santra_2025_CVPR, author = {Santra, Sanchayan and Chudasama, Vishal and Wasnik, Pankaj and Balasubramanian, Vineeth N}, title = {Precise Event Spotting in Sports Videos: Solving Long-Range Dependency and Class Imbalance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3163-3172} }
Steady Progress Beats Stagnation: Mutual Aid of Foundation and Conventional Models in Mixed Domain Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Qinghe and Zhang, Jian and Li, Zekun and Qi, Lei and Yu, Qian and Shi, Yinghuan}, title = {Steady Progress Beats Stagnation: Mutual Aid of Foundation and Conventional Models in Mixed Domain Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5175-5185} }
ARKit LabelMaker: A New Scale for Indoor 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Guangda and Weder, Silvan and Engelmann, Francis and Pollefeys, Marc and Blum, Hermann}, title = {ARKit LabelMaker: A New Scale for Indoor 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4398-4407} }
StreamingT2V: Consistent, Dynamic, and Extendable Long Video Generation from Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Henschel_2025_CVPR, author = {Henschel, Roberto and Khachatryan, Levon and Poghosyan, Hayk and Hayrapetyan, Daniil and Tadevosyan, Vahram and Wang, Zhangyang and Navasardyan, Shant and Shi, Humphrey}, title = {StreamingT2V: Consistent, Dynamic, and Extendable Long Video Generation from Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2568-2577} }
AFL: A Single-Round Analytic Approach for Federated Learning with Pre-trained Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Run and Tong, Kai and Fang, Di and Sun, Han and Zeng, Ziqian and Li, Haoran and Chen, Tianyi and Zhuang, Huiping}, title = {AFL: A Single-Round Analytic Approach for Federated Learning with Pre-trained Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4988-4998} }
BOLT: Boost Large Vision-Language Model Without Training for Long-form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shuming and Zhao, Chen and Xu, Tianqi and Ghanem, Bernard}, title = {BOLT: Boost Large Vision-Language Model Without Training for Long-form Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3318-3327} }
Reference-Based 3D-Aware Image Editing with Triplanes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bilecen_2025_CVPR, author = {Bilecen, Bahri Batuhan and Yalin, Yigit and Yu, Ning and Dundar, Aysegul}, title = {Reference-Based 3D-Aware Image Editing with Triplanes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5904-5915} }
One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Yuchen and Yuan, Quan and Luo, Guiyang and Fu, Xiaoyuan and Li, Yang and Zhu, Xuanhan and Luo, Tianyou and Chen, Siheng and Li, Jinglin}, title = {One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1592-1601} }
SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by Imitating Human Annotator Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Muzhi and Tian, Yuzhuo and Chen, Hao and Zhou, Chunluan and Guo, Qingpei and Liu, Yang and Yang, Ming and Shen, Chunhua}, title = {SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by Imitating Human Annotator Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3686-3696} }
SceneCrafter: Controllable Multi-View Driving Scene Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zehao and Zou, Yuliang and Jiang, Chiyu Max and Sun, Bo and Casser, Vincent and Huang, Xiukun and Wang, Jiahao and Yang, Zhenpei and Gao, Ruiqi and Guibas, Leonidas and Tan, Mingxing and Anguelov, Dragomir}, title = {SceneCrafter: Controllable Multi-View Driving Scene Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6812-6822} }
HeatFormer: A Neural Optimizer for Multiview Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsubara_2025_CVPR, author = {Matsubara, Yuto and Nishino, Ko}, title = {HeatFormer: A Neural Optimizer for Multiview Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6415-6424} }
GPS as a Control Signal for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Chao and Chen, Ziyang and Holynski, Aleksander and Efros, Alexei A. and Owens, Andrew}, title = {GPS as a Control Signal for Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2766-2778} }
CPath-Omni: A Unified Multimodal Foundation Model for Patch and Whole Slide Image Analysis in Computational Pathology-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yuxuan and Si, Yixuan and Zhu, Chenglu and Gong, Xuan and Zhang, Kai and Chen, Pingyi and Zhang, Ye and Shui, Zhongyi and Lin, Tao and Yang, Lin}, title = {CPath-Omni: A Unified Multimodal Foundation Model for Patch and Whole Slide Image Analysis in Computational Pathology}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10360-10371} }
MAGiC-SLAM: Multi-Agent Gaussian Globally Consistent SLAM-
[pdf]
[bibtex]@InProceedings{Yugay_2025_CVPR, author = {Yugay, Vladimir and Gevers, Theo and Oswald, Martin R.}, title = {MAGiC-SLAM: Multi-Agent Gaussian Globally Consistent SLAM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6741-6750} }
NTClick: Achieving Precise Interactive Segmentation With Noise-tolerant Clicks-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyi and Liu, Ting and Qu, Xiaochao and Liu, Luoqi and Zhao, Yao and Wei, Yunchao}, title = {NTClick: Achieving Precise Interactive Segmentation With Noise-tolerant Clicks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8921-8930} }
MVGenMaster: Scaling Multi-View Generation from Any Image via 3D Priors Enhanced Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Chenjie and Yu, Chaohui and Liu, Shang and Wang, Fan and Xue, Xiangyang and Fu, Yanwei}, title = {MVGenMaster: Scaling Multi-View Generation from Any Image via 3D Priors Enhanced Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6045-6056} }
HiMoR: Monocular Deformable Gaussian Reconstruction with Hierarchical Motion Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yiming and Xu, Tianhan and Kikuchi, Yuta}, title = {HiMoR: Monocular Deformable Gaussian Reconstruction with Hierarchical Motion Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {886-895} }
Enhanced Visual-Semantic Interaction with Tailored Prompts for Pedestrian Attribute Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Junyi and Huang, Yan and Gao, Min and Niu, Yuzhen and Chen, Yuzhong and Wu, Qiang}, title = {Enhanced Visual-Semantic Interaction with Tailored Prompts for Pedestrian Attribute Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9570-9579} }
HSI-GPT: A General-Purpose Large Scene-Motion-Language Model for Human Scene Interaction-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuan and Li, Yali and Li, Xiang and Wang, Shengjin}, title = {HSI-GPT: A General-Purpose Large Scene-Motion-Language Model for Human Scene Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7147-7157} }
Vid2Avatar-Pro: Authentic Avatar from Videos in the Wild via Universal Prior-
[pdf]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Chen and Li, Junxuan and Kant, Yash and Sheikh, Yaser and Saito, Shunsuke and Cao, Chen}, title = {Vid2Avatar-Pro: Authentic Avatar from Videos in the Wild via Universal Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5559-5570} }
RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhipeng and Yu, Wangbo and Cheng, Xinhua and Zhao, Chengshu and Ge, Yunyang and Guo, Mingyi and Yuan, Li and Tian, Yonghong}, title = {RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {574-584} }
IRIS: Inverse Rendering of Indoor Scenes from Low Dynamic Range Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Chih-Hao and Huang, Jia-Bin and Li, Zhengqin and Dong, Zhao and Richardt, Christian and Li, Tuotuo and Zollh\"ofer, Michael and Kopf, Johannes and Wang, Shenlong and Kim, Changil}, title = {IRIS: Inverse Rendering of Indoor Scenes from Low Dynamic Range Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {465-474} }
RoGSplat: Learning Robust Generalizable Human Gaussian Splatting from Sparse Multi-View Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Junjin and Zhang, Qing and Nie, Yonewei and Zhu, Lei and Zheng, Wei-Shi}, title = {RoGSplat: Learning Robust Generalizable Human Gaussian Splatting from Sparse Multi-View Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5980-5990} }
EnliveningGS: Active Locomotion of 3DGS-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Siyuan and Shao, Tianjia and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {EnliveningGS: Active Locomotion of 3DGS}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {896-905} }
Knowledge-Aligned Counterfactual-Enhancement Diffusion Perception for Unsupervised Cross-Domain Visual Emotion Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Wen and Wang, Yong and Duan, Guiduo and Zhang, Dongyang and Hu, Xin and Li, Yuan-Fang and He, Tao}, title = {Knowledge-Aligned Counterfactual-Enhancement Diffusion Perception for Unsupervised Cross-Domain Visual Emotion Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3888-3898} }
LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Stojnic_2025_CVPR, author = {Stojni\'c, Vladan and Kalantidis, Yannis and Matas, Ji\v{r}{\'\i} and Tolias, Giorgos}, title = {LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9794-9803} }
PhysGen3D: Crafting a Miniature Interactive World from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Boyuan and Jiang, Hanxiao and Liu, Shaowei and Gupta, Saurabh and Li, Yunzhu and Zhao, Hao and Wang, Shenlong}, title = {PhysGen3D: Crafting a Miniature Interactive World from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6178-6189} }
Docopilot: Improving Multimodal Models for Document-Level Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Yuchen and Chen, Zhe and Hu, Yusong and Wang, Weiyun and Ye, Shenglong and Shi, Botian and Lu, Lewei and Hou, Qibin and Lu, Tong and Li, Hongsheng and Dai, Jifeng and Wang, Wenhai}, title = {Docopilot: Improving Multimodal Models for Document-Level Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4026-4037} }
Self-supervised ControlNet with Spatio-Temporal Mamba for Real-world Video Super-resolution-
[pdf]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Shijun and Xu, Jing and Lu, Lijing and Li, Zhihang and Hu, Kai}, title = {Self-supervised ControlNet with Spatio-Temporal Mamba for Real-world Video Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7385-7395} }
LATTE-MV: Learning to Anticipate Table Tennis Hits from Monocular Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Etaat_2025_CVPR, author = {Etaat, Daniel and Kalaria, Dvij and Rahmanian, Nima and Sastry, S. Shankar}, title = {LATTE-MV: Learning to Anticipate Table Tennis Hits from Monocular Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7115-7124} }
Distilling Spatially-Heterogeneous Distortion Perception for Blind Image Quality Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xudong and Nie, Wenjie and Zhang, Yan and Hu, Runze and Li, Ke and Zheng, Xiawu and Cao, Liujuan}, title = {Distilling Spatially-Heterogeneous Distortion Perception for Blind Image Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2344-2354} }
2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jingwei and Nguyen, Anh Tien and Han, Xi and Trinh, Vincent Quoc-Huy and Qin, Hong and Samaras, Dimitris and Hosseini, Mahdi S.}, title = {2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3583-3592} }
Unboxed: Geometrically and Temporally Consistent Video Outpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Zhongrui and Megaro-Boldini, Martina and Sumner, Robert W. and Djelouah, Abdelaziz}, title = {Unboxed: Geometrically and Temporally Consistent Video Outpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7309-7319} }
K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhikai and Liu, Xuewen and Fu, Dongrong Joe and Li, Jianquan and Gu, Qingyi and Keutzer, Kurt and Dong, Zhen}, title = {K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9131-9141} }
Dense-SfM: Structure from Motion with Dense Consistent Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, JongMin and Yoo, Sungjoo}, title = {Dense-SfM: Structure from Motion with Dense Consistent Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6404-6414} }
Sketchy Bounding-box Supervision for 3D Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Qian and Hui, Le and Xie, Jin and Yang, Jian}, title = {Sketchy Bounding-box Supervision for 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8879-8888} }
StreetCrafter: Street View Synthesis with Controllable Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Yunzhi and Xu, Zhen and Lin, Haotong and Jin, Haian and Guo, Haoyu and Wang, Yida and Zhan, Kun and Lang, Xianpeng and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {StreetCrafter: Street View Synthesis with Controllable Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {822-832} }
Learning Person-Specific Animatable Face Models from In-the-Wild Images via a Shared Base Model-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Yuxiang and Fan, Zhenfeng and Zhang, ZhiJie and Zhang, Zhiheng and Xia, Shihong}, title = {Learning Person-Specific Animatable Face Models from In-the-Wild Images via a Shared Base Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5602-5613} }
TIMotion: Temporal and Interactive Framework for Efficient Human-Human Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yabiao and Wang, Shuo and Zhang, Jiangning and Fan, Ke and Wu, Jiafu and Xue, Zhucun and Liu, Yong}, title = {TIMotion: Temporal and Interactive Framework for Efficient Human-Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7169-7178} }
Hybrid Reciprocal Transformer with Triplet Feature Alignment for Scene Graph Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Jiawei and Zhang, Tiantian and Chen, Kai and Dou, Qi}, title = {Hybrid Reciprocal Transformer with Triplet Feature Alignment for Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8953-8963} }
Gradient Inversion Attacks on Parameter-Efficient Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Sami_2025_CVPR, author = {Sami, Hasin Us and Sen, Swapneel and Roy-Chowdhury, Amit K. and Krishnamurthy, Srikanth V. and Guler, Basak}, title = {Gradient Inversion Attacks on Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10224-10234} }
UPME: An Unsupervised Peer Review Framework for Multimodal Large Language Model Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qihui and Ning, Munan and Liu, Zheyuan and Huang, Yue and Yang, Shuo and Wang, Yanbo and Ye, Jiayi and Chen, Xiao and Song, Yibing and Yuan, Li}, title = {UPME: An Unsupervised Peer Review Framework for Multimodal Large Language Model Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9165-9174} }
MBQ: Modality-Balanced Quantization for Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shiyao and Hu, Yingchun and Ning, Xuefei and Liu, Xihui and Hong, Ke and Jia, Xiaotao and Li, Xiuhong and Yan, Yaqi and Ran, Pei and Dai, Guohao and Yan, Shengen and Yang, Huazhong and Wang, Yu}, title = {MBQ: Modality-Balanced Quantization for Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4167-4177} }
VideoDPO: Omni-Preference Alignment for Video Diffusion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Runtao and Wu, Haoyu and Zheng, Ziqiang and Wei, Chen and He, Yingqing and Pi, Renjie and Chen, Qifeng}, title = {VideoDPO: Omni-Preference Alignment for Video Diffusion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8009-8019} }
Associative Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Yuwei and Ochiai, Hideya and Wu, Zhirong and Lin, Stephen and Kanai, Ryota}, title = {Associative Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4518-4527} }
ChatGarment: Garment Estimation, Generation and Editing via Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bian_2025_CVPR, author = {Bian, Siyuan and Xu, Chenghao and Xiu, Yuliang and Grigorev, Artur and Liu, Zhen and Lu, Cewu and Black, Michael J. and Feng, Yao}, title = {ChatGarment: Garment Estimation, Generation and Editing via Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2924-2934} }
RDD: Robust Feature Detector and Descriptor using Deformable Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Gonglin and Fu, Tianwen and Chen, Haiwei and Teng, Wenbin and Xiao, Hanyuan and Zhao, Yajie}, title = {RDD: Robust Feature Detector and Descriptor using Deformable Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6394-6403} }
Building Vision Models upon Heat Conduction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhaozhi and Liu, Yue and Tian, Yunjie and Liu, Yunfan and Wang, Yaowei and Ye, Qixiang}, title = {Building Vision Models upon Heat Conduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9707-9717} }
LT3SD: Latent Trees for 3D Scene Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Quan and Li, Lei and Nie{\ss}ner, Matthias and Dai, Angela}, title = {LT3SD: Latent Trees for 3D Scene Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {650-660} }
CraftsMan3D: High-fidelity Mesh Generation with 3D Native Diffusion and Interactive Geometry Refiner-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Weiyu and Liu, Jiarui and Yan, Hongyu and Chen, Rui and Liang, Yixun and Chen, Xuelin and Tan, Ping and Long, Xiaoxiao}, title = {CraftsMan3D: High-fidelity Mesh Generation with 3D Native Diffusion and Interactive Geometry Refiner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5307-5317} }
GIF: Generative Inspiration for Face Recognition at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ebrahimi_2025_CVPR, author = {Ebrahimi, Saeed and Rahimi, Sahar and Dabouei, Ali and Das, Srinjoy and Dawson, Jeremy M. and Nasrabadi, Nasser M.}, title = {GIF: Generative Inspiration for Face Recognition at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3528-3539} }
SKDream: Controllable Multi-view and 3D Generation with Arbitrary Skeletons-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yuanyou and Yang, Zongxin and Yang, Yi}, title = {SKDream: Controllable Multi-view and 3D Generation with Arbitrary Skeletons}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {314-325} }
Optimus-2: Multimodal Minecraft Agent with Goal-Observation-Action Conditioned Policy-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zaijing and Xie, Yuquan and Shao, Rui and Chen, Gongwei and Jiang, Dongmei and Nie, Liqiang}, title = {Optimus-2: Multimodal Minecraft Agent with Goal-Observation-Action Conditioned Policy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9039-9049} }
Classic Video Denoising in a Machine Learning World: Robust, Fast, and Controllable-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Xin and Niklaus, Simon and Zhang, Zhoutong and Xia, Zhihao and Guo, Chunle and Yang, Yuting and Chen, Jiawen and Li, Chongyi}, title = {Classic Video Denoising in a Machine Learning World: Robust, Fast, and Controllable}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2084-2093} }
Population Normalization for Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhuoyao and Yi, Fan and Gong, Peizhu and He, Caitou and Jin, Cheng and Zhang, Weizhong}, title = {Population Normalization for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10214-10223} }
RipVIS: Rip Currents Video Instance Segmentation Benchmark for Beach Monitoring and Safety-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dumitriu_2025_CVPR, author = {Dumitriu, Andrei and Tatui, Florin and Miron, Florin and Ralhan, Aakash and Ionescu, Radu Tudor and Timofte, Radu}, title = {RipVIS: Rip Currents Video Instance Segmentation Benchmark for Beach Monitoring and Safety}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3427-3437} }
ESCAPE: Equivariant Shape Completion via Anchor Point Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bekci_2025_CVPR, author = {Bekci, Burak and Navab, Nassir and Tombari, Federico and Saleh, Mahdi}, title = {ESCAPE: Equivariant Shape Completion via Anchor Point Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6480-6489} }
Satellite to GroundScape - Large-scale Consistent Ground View Generation from Satellite Views-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Ningli and Qin, Rongjun}, title = {Satellite to GroundScape - Large-scale Consistent Ground View Generation from Satellite Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6068-6077} }
Variance-Based Membership Inference Attacks Against Large-Scale Image Captioning Models-
[pdf]
[supp]
[bibtex]@InProceedings{Samira_2025_CVPR, author = {Samira, Daniel and Habler, Edan and Elovici, Yuval and Shabtai, Asaf}, title = {Variance-Based Membership Inference Attacks Against Large-Scale Image Captioning Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9210-9219} }
Learning Dynamic Collaborative Network for Semi-supervised 3D Vessel Segmentation-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jiao and Chen, Xin and Zhang, Lihe}, title = {Learning Dynamic Collaborative Network for Semi-supervised 3D Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10445-10454} }
Temporal Alignment-Free Video Matching for Few-shot Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, SuBeen and Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Temporal Alignment-Free Video Matching for Few-shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5412-5421} }
OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{C_2025_CVPR, author = {C, Mohamad Hassan N and Gupta, Divyam and Singha, Mainak and Rongali, Sai Bhargav and Jha, Ankit and Khan, Muhammad Haris and Banerjee, Biplab}, title = {OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10110-10120} }
VLog: Video-Language Models by Generative Retrieval of Narration Vocabulary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {VLog: Video-Language Models by Generative Retrieval of Narration Vocabulary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3218-3228} }
Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite for Large Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jin and Lv, Chenghui and Li, Xian and Dong, Shichao and Li, Huadong and Yao, Kelu and Li, Chao and Shao, Wenqi and Luo, Ping}, title = {Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4233-4245} }
Forensic Self-Descriptions Are All You Need for Zero-Shot Detection, Open-Set Source Attribution, and Clustering of AI-generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Tai D. and Azizpour, Aref and Stamm, Matthew C.}, title = {Forensic Self-Descriptions Are All You Need for Zero-Shot Detection, Open-Set Source Attribution, and Clustering of AI-generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3040-3050} }
FlexDrive: Toward Trajectory Flexibility in Driving Scene Gaussian Splatting Reconstruction and Rendering-
[pdf]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jingqiu and Fan, Lue and Huang, Linjiang and Shi, Xiaoyu and Liu, Si and Zhang, Zhaoxiang and Li, Hongsheng}, title = {FlexDrive: Toward Trajectory Flexibility in Driving Scene Gaussian Splatting Reconstruction and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1549-1558} }
Taming Video Diffusion Prior with Scene-Grounding Guidance for 3D Gaussian Splatting from Sparse Inputs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Yingji and Li, Zhihao and Chen, Dave Zhenyu and Hong, Lanqing and Xu, Dan}, title = {Taming Video Diffusion Prior with Scene-Grounding Guidance for 3D Gaussian Splatting from Sparse Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6133-6143} }
Augmenting Multimodal LLMs with Self-Reflective Tokens for Knowledge-based Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cocchi_2025_CVPR, author = {Cocchi, Federico and Moratelli, Nicholas and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Augmenting Multimodal LLMs with Self-Reflective Tokens for Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9199-9209} }
DifIISR: A Diffusion Model with Gradient Guidance for Infrared Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xingyuan and Wang, Zirui and Zou, Yang and Chen, Zhixin and Ma, Jun and Jiang, Zhiying and Ma, Long and Liu, Jinyuan}, title = {DifIISR: A Diffusion Model with Gradient Guidance for Infrared Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7534-7544} }
3D-GSW: 3D Gaussian Splatting for Robust Watermarking-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Youngdong and Park, Hyunje and Yang, Feng and Ko, Heeju and Choo, Euijin and Kim, Sangpil}, title = {3D-GSW: 3D Gaussian Splatting for Robust Watermarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5938-5948} }
OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Pengfei and Peng, Xiaopeng and Song, Jiajun and Li, Chuanhao and Xu, Zhaopan and Yang, Yue and Guo, Ziyao and Zhang, Hao and Lin, Yuqi and He, Yefei and Zhao, Lirui and Liu, Shuo and Li, Tianhua and Xie, Yuxuan and Chang, Xiaojun and Qiao, Yu and Shao, Wenqi and Zhang, Kaipeng}, title = {OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {56-66} }
Dual Exposure Stereo for Extended Dynamic Range 3D Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Juhyung and Kim, Jinnyeong and Choi, Seokjun and Lee, Jinwoo and Brucker, Samuel and Bijelic, Mario and Heide, Felix and Baek, Seung-Hwan}, title = {Dual Exposure Stereo for Extended Dynamic Range 3D Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6283-6293} }
PAVE: Patching and Adapting Video Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhuoming and Li, Yiquan and Nguyen, Khoi Duc and Zhong, Yiwu and Li, Yin}, title = {PAVE: Patching and Adapting Video Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3306-3317} }
Generative Image Layer Decomposition with Visual Effects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jinrui and Liu, Qing and Li, Yijun and Kim, Soo Ye and Pakhomov, Daniil and Ren, Mengwei and Zhang, Jianming and Lin, Zhe and Xie, Cihang and Zhou, Yuyin}, title = {Generative Image Layer Decomposition with Visual Effects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7643-7653} }
AR-Diffusion: Asynchronous Video Generation with Auto-Regressive Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Mingzhen and Wang, Weining and Li, Gen and Liu, Jiawei and Sun, Jiahui and Feng, Wanquan and Lao, Shanshan and Zhou, Siyu and He, Qian and Liu, Jing}, title = {AR-Diffusion: Asynchronous Video Generation with Auto-Regressive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7364-7373} }
Revisiting Audio-Visual Segmentation with Vision-Centric Transformer-
[pdf]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Shaofei and Ling, Rui and Hui, Tianrui and Li, Hongyu and Zhou, Xu and Zhang, Shifeng and Liu, Si and Hong, Richang and Wang, Meng}, title = {Revisiting Audio-Visual Segmentation with Vision-Centric Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8352-8361} }
HOIGPT: Learning Long-Sequence Hand-Object Interaction with Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Mingzhen and Chu, Fu-Jen and Tekin, Bugra and Liang, Kevin J. and Ma, Haoyu and Wang, Weiyao and Chen, Xingyu and Gleize, Pierre and Xue, Hongfei and Lyu, Siwei and Kitani, Kris and Feiszli, Matt and Tang, Hao}, title = {HOIGPT: Learning Long-Sequence Hand-Object Interaction with Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7136-7146} }
Taxonomy-Aware Evaluation of Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Snaebjarnarson_2025_CVPR, author = {Sn{\ae}bjarnarson, V\'esteinn and Du, Kevin and Stoehr, Niklas and Belongie, Serge and Cotterell, Ryan and Lang, Nico and Frank, Stella}, title = {Taxonomy-Aware Evaluation of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9109-9120} }
Active Event-based Stereo Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jianing and Zhang, Yunjian and Han, Haiqian and Ji, Xiangyang}, title = {Active Event-based Stereo Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {971-981} }
SMILE: Infusing Spatial and Motion Semantics in Masked Video Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thoker_2025_CVPR, author = {Thoker, Fida Mohammad and Jiang, Letian and Zhao, Chen and Ghanem, Bernard}, title = {SMILE: Infusing Spatial and Motion Semantics in Masked Video Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8438-8449} }
Video Language Model Pretraining with Spatio-temporal Masking-
[pdf]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yue and Qi, Zhaobo and Sun, Junshu and Wang, Yaowei and Huang, Qingming and Wang, Shuhui}, title = {Video Language Model Pretraining with Spatio-temporal Masking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8557-8567} }
Synthetic Data is an Elegant GIFT for Continual Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Bin and Shi, Wuxuan and Wang, Jinqiao and Ye, Mang}, title = {Synthetic Data is an Elegant GIFT for Continual Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2813-2823} }
Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Feng and Zhang, Shiwei and Wang, Xiaofeng and Wei, Yujie and Qiu, Haonan and Zhao, Yuzhong and Zhang, Yingya and Ye, Qixiang and Wan, Fang}, title = {Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7353-7363} }
Hypergraph Vision Transformers: Images are More than Nodes, More than Edges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fixelle_2025_CVPR, author = {Fixelle, Joshua}, title = {Hypergraph Vision Transformers: Images are More than Nodes, More than Edges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9751-9761} }
Binarized Neural Network for Multi-spectral Image Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2025_CVPR, author = {Hou, Junming and Chen, Xiaoyu and Ran, Ran and Cong, Xiaofeng and Liu, Xinyang and You, Jian Wei and Deng, Liang-Jian}, title = {Binarized Neural Network for Multi-spectral Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2236-2245} }
GaussianIP: Identity-Preserving Realistic 3D Human Generation via Human-Centric Diffusion Prior-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Zichen and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng and Yang, Hongyu}, title = {GaussianIP: Identity-Preserving Realistic 3D Human Generation via Human-Centric Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {348-358} }
FineVQ: Fine-Grained User Generated Content Video Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Huiyu and Hu, Qiang and Wang, Jiarui and Yang, Liu and Xu, Zitong and Liu, Lu and Min, Xiongkuo and Cai, Chunlei and Ye, Tianxiao and Zhang, Xiaoyun and Zhai, Guangtao}, title = {FineVQ: Fine-Grained User Generated Content Video Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3206-3217} }
Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering Incorrectly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yexin and Liang, Zhengyang and Wang, Yueze and Wu, Xianfeng and Tang, Feilong and He, Muyang and Li, Jian and Liu, Zheng and Yang, Harry and Lim, Sernam and Zhao, Bo}, title = {Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering Incorrectly}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9087-9097} }
MANTA: Diffusion Mamba for Efficient and Effective Stochastic Long-Term Dense Action Anticipation-
[pdf]
[supp]
[bibtex]@InProceedings{Zatsarynna_2025_CVPR, author = {Zatsarynna, Olga and Bahrami, Emad and Abu Farha, Yazan and Francesca, Gianpiero and Gall, Juergen}, title = {MANTA: Diffusion Mamba for Efficient and Effective Stochastic Long-Term Dense Action Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3438-3448} }
METASCENES: Towards Automated Replica Creation for Real-world 3D Scans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Huangyue and Jia, Baoxiong and Chen, Yixin and Yang, Yandan and Li, Puhao and Su, Rongpeng and Li, Jiaxin and Li, Qing and Liang, Wei and Zhu, Song-Chun and Liu, Tengyu and Huang, Siyuan}, title = {METASCENES: Towards Automated Replica Creation for Real-world 3D Scans}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1667-1679} }
Robust Multimodal Survival Prediction with Conditional Latent Differentiation Variational AutoEncoder-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Tang, Jiao and Zuo, Yingli and Wan, Peng and Zhang, Daoqiang and Shao, Wei}, title = {Robust Multimodal Survival Prediction with Conditional Latent Differentiation Variational AutoEncoder}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10384-10393} }
Zero-Shot Blind-spot Image Denoising via Implicit Neural Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Quan_2025_CVPR, author = {Quan, Yuhui and Zheng, Tianxiang and Ma, Zhiyuan and Ji, Hui}, title = {Zero-Shot Blind-spot Image Denoising via Implicit Neural Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7502-7512} }
Track4Gen: Teaching Video Diffusion Models to Track Points Improves Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Hyeonho and Huang, Chun-Hao P. and Ye, Jong Chul and Mitra, Niloy J. and Ceylan, Duygu}, title = {Track4Gen: Teaching Video Diffusion Models to Track Points Improves Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7276-7287} }
InteractAnything: Zero-shot Human Object Interaction Synthesis via LLM Feedback and Object Affordance Parsing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinlu and Chen, Yixin and Wang, Zan and Yang, Jie and Wang, Yizhou and Huang, Siyuan}, title = {InteractAnything: Zero-shot Human Object Interaction Synthesis via LLM Feedback and Object Affordance Parsing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7015-7025} }
Wonderland: Navigating 3D Scenes from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Hanwen and Cao, Junli and Goel, Vidit and Qian, Guocheng and Korolev, Sergei and Terzopoulos, Demetri and Plataniotis, Konstantinos N. and Tulyakov, Sergey and Ren, Jian}, title = {Wonderland: Navigating 3D Scenes from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {798-810} }
Towards Satellite Image Road Graph Extraction: A Global-Scale Dataset and A Novel Method-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Pan and Li, Kaiyu and Cao, Xiangyong and Yao, Jing and Liu, Lei and Bai, Xueru and Zhou, Feng and Meng, Deyu}, title = {Towards Satellite Image Road Graph Extraction: A Global-Scale Dataset and A Novel Method}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1527-1537} }
SuperLightNet: Lightweight Parameter Aggregation Network for Multimodal Brain Tumor Segmentation-
[pdf]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Feng and Cao, Jiacheng and Liu, Li and Jiang, Minghua}, title = {SuperLightNet: Lightweight Parameter Aggregation Network for Multimodal Brain Tumor Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5197-5206} }
Self-Supervised Spatial Correspondence Across Modalities-
[pdf]
[bibtex]@InProceedings{Shrivastava_2025_CVPR, author = {Shrivastava, Ayush and Owens, Andrew}, title = {Self-Supervised Spatial Correspondence Across Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6383-6393} }
MOS-Attack: A Scalable Multi-objective Adversarial Attack Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Ping and Gong, Cheng and Lin, Xi and Liu, Fei and Lu, Zhichao and Zhang, Qingfu and Wang, Zhenkun}, title = {MOS-Attack: A Scalable Multi-objective Adversarial Attack Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5041-5051} }
Motion Modes: What Could Happen Next?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pandey_2025_CVPR, author = {Pandey, Karran and Hold-Geoffroy, Yannick and Gadelha, Matheus and Mitra, Niloy J. and Singh, Karan and Guerrero, Paul}, title = {Motion Modes: What Could Happen Next?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2030-2039} }
Finer-CAM: Spotting the Difference Reveals Finer Details for Visual Explanation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ziheng and Gu, Jianyang and Chowdhury, Arpita and Mai, Zheda and Carlyn, David and Berger-Wolf, Tanya and Su, Yu and Chao, Wei-Lun}, title = {Finer-CAM: Spotting the Difference Reveals Finer Details for Visual Explanation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9611-9620} }
The Change You Want To Detect: Semantic Change Detection In Earth Observation With Hybrid Data Generationf-
[pdf]
[supp]
[bibtex]@InProceedings{Benidir_2025_CVPR, author = {Benidir, Yanis and Gonthier, Nicolas and Mallet, Clement}, title = {The Change You Want To Detect: Semantic Change Detection In Earth Observation With Hybrid Data Generationf}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2204-2214} }
Weakly Supervised Semantic Segmentation via Progressive Confidence Region Expansion-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Xiangfeng and Zhang, Pinyi and Huang, Wenxuan and Shen, Yunhang and Chen, Haosheng and Lin, Jingzhong and Li, Wei and He, Gaoqi and Xie, Jiao and Lin, Shaohui}, title = {Weakly Supervised Semantic Segmentation via Progressive Confidence Region Expansion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9829-9838} }
RELOCATE: A Simple Training-Free Baseline for Visual Query Localization Using Region-Based Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khosla_2025_CVPR, author = {Khosla, Savya and V, Sethuraman T and Schwing, Alexander and Hoiem, Derek}, title = {RELOCATE: A Simple Training-Free Baseline for Visual Query Localization Using Region-Based Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3697-3706} }
HOTFormerLoc: Hierarchical Octree Transformer for Versatile Lidar Place Recognition Across Ground and Aerial Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Griffiths_2025_CVPR, author = {Griffiths, Ethan and Haghighat, Maryam and Denman, Simon and Fookes, Clinton and Ramezani, Milad}, title = {HOTFormerLoc: Hierarchical Octree Transformer for Versatile Lidar Place Recognition Across Ground and Aerial Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6648-6658} }
UniK3D: Universal Camera Monocular 3D Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Piccinelli_2025_CVPR, author = {Piccinelli, Luigi and Sakaridis, Christos and Segu, Mattia and Yang, Yung-Hsu and Li, Siyuan and Abbeloos, Wim and Van Gool, Luc}, title = {UniK3D: Universal Camera Monocular 3D Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1028-1039} }
ConMo: Controllable Motion Disentanglement and Recomposition for Zero-Shot Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Jiayi and Yin, Zijin and Hua, Changcheng and Peng, Yuxin and Liang, Kongming and Ma, Zhanyu and Guo, Jun and Liu, Yang}, title = {ConMo: Controllable Motion Disentanglement and Recomposition for Zero-Shot Motion Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7191-7200} }
AG-VPReID: A Challenging Large-Scale Benchmark for Aerial-Ground Video-based Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Huy and Nguyen, Kien and Pemasiri, Akila and Liu, Feng and Sridharan, Sridha and Fookes, Clinton}, title = {AG-VPReID: A Challenging Large-Scale Benchmark for Aerial-Ground Video-based Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1241-1251} }
EBS-EKF: Accurate and High Frequency Event-based Star Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Reed_2025_CVPR, author = {Reed, Albert W. and Hashemi, Connor and Melamed, Dennis and Menon, Nitesh and Hirakawa, Keigo and McCloskey, Scott}, title = {EBS-EKF: Accurate and High Frequency Event-based Star Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6510-6519} }
Benchmarking Object Detectors under Real-World Distribution Shifts in Satellite Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Al-Emadi_2025_CVPR, author = {Al-Emadi, Sara A. and Yang, Yin and Ofli, Ferda}, title = {Benchmarking Object Detectors under Real-World Distribution Shifts in Satellite Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8299-8309} }
SAIST: Segment Any Infrared Small Target Model Guided by Contrastive Language-Image Pretraining-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Mingjin and Li, Xiaolong and Gao, Fei and Guo, Jie and Gao, Xinbo and Zhang, Jing}, title = {SAIST: Segment Any Infrared Small Target Model Guided by Contrastive Language-Image Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9549-9558} }
Closed-Loop Supervised Fine-Tuning of Tokenized Traffic Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhejun and Karkus, Peter and Igl, Maximilian and Ding, Wenhao and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {Closed-Loop Supervised Fine-Tuning of Tokenized Traffic Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5422-5432} }
CoLLM: A Large Language Model for Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huynh_2025_CVPR, author = {Huynh, Chuong and Yang, Jinyu and Tawari, Ashish and Shah, Mubarak and Tran, Son and Hamid, Raffay and Chilimbi, Trishul and Shrivastava, Abhinav}, title = {CoLLM: A Large Language Model for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3994-4004} }
GraphMimic: Graph-to-Graphs Generative Modeling from Videos for Policy Learning-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Guangyan and Cui, Te and Wang, Meiling and Yang, Chengcai and Hu, Mengxiao and Lu, Haoyang and Mu, Yao and Peng, Zicai and Zhou, Tianxing and Jiang, Xinran and Yang, Yi and Yue, Yufeng}, title = {GraphMimic: Graph-to-Graphs Generative Modeling from Videos for Policy Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1756-1768} }
MMVU: Measuring Expert-Level Multi-Discipline Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yilun and Zhang, Haowei and Xie, Lujing and Hu, Tongyan and Gan, Guo and Long, Yitao and Hu, Zhiyuan and Chen, Weiyuan and Li, Chuhan and Xu, Zhijian and Wang, Chengye and Shangguan, Ziyao and Liang, Zhenwen and Liu, Yixin and Zhao, Chen and Cohan, Arman}, title = {MMVU: Measuring Expert-Level Multi-Discipline Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8475-8489} }
EgoLM: Multi-Modal Language Model of Egocentric Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Fangzhou and Guzov, Vladimir and Kim, Hyo Jin and Ye, Yuting and Newcombe, Richard and Liu, Ziwei and Ma, Lingni}, title = {EgoLM: Multi-Modal Language Model of Egocentric Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5344-5354} }
Long Video Diffusion Generation with Segmented Cross-Attention and Content-Rich Video Data Curation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Xin and Cai, Yuxuan and Wang, Qiuyue and Zhou, Yuan and Huang, Wenhao and Yang, Huan}, title = {Long Video Diffusion Generation with Segmented Cross-Attention and Content-Rich Video Data Curation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3184-3194} }
Learning Heterogeneous Tissues with Mixture of Experts for Gigapixel Whole Slide Images-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Junxian and Chen, Minheng and Ke, Xinyi and Xun, Tianwang and Jiang, Xiaoming and Zhou, Hongyu and Shao, Lizhi and Kong, Youyong}, title = {Learning Heterogeneous Tissues with Mixture of Experts for Gigapixel Whole Slide Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5144-5153} }
Disentangled Pose and Appearance Guidance for Multi-Pose Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Tengfei and Wu, Yue and Li, Yuelong and Qin, Can and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {Disentangled Pose and Appearance Guidance for Multi-Pose Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5646-5655} }
Mind the Gap: Confidence Discrepancy Can Guide Federated Semi-Supervised Learning Across Pseudo-Mismatch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yijie and Shang, Xinyi and Zhang, Yiqun and Lu, Yang and Gong, Chen and Xue, Jing-Hao and Wang, Hanzi}, title = {Mind the Gap: Confidence Discrepancy Can Guide Federated Semi-Supervised Learning Across Pseudo-Mismatch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10173-10182} }
Electromyography-Informed Facial Expression Reconstruction for Physiological-Based Synthesis and Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Buchner_2025_CVPR, author = {B\"uchner, Tim and Anders, Christoph and Guntinas-Lichius, Orlando and Denzler, Joachim}, title = {Electromyography-Informed Facial Expression Reconstruction for Physiological-Based Synthesis and Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {215-227} }
Improving the Transferability of Adversarial Attacks on Face Recognition with Diverse Parameters Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Fengfan and Yin, Bangjie and Ling, Hefei and Zhou, Qianyu and Wang, Wenxuan}, title = {Improving the Transferability of Adversarial Attacks on Face Recognition with Diverse Parameters Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3516-3527} }
Adapting to Observation Length of Trajectory Prediction via Contrastive Learning-
[pdf]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Ruiqi and Gong, Jun and Zhang, Xinyu and Luo, Siqi and Zhang, Bowen and Cen, Yi}, title = {Adapting to Observation Length of Trajectory Prediction via Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1645-1654} }
Fine-Grained Image-Text Correspondence with Cost Aggregation for Open-Vocabulary Part Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Jiho and Lee, Seonho and Lee, Minhyun and Lee, Seungho and Shim, Hyunjung}, title = {Fine-Grained Image-Text Correspondence with Cost Aggregation for Open-Vocabulary Part Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9782-9793} }
NitroFusion: High-Fidelity Single-Step Diffusion through Dynamic Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Dar-Yen and Bandyopadhyay, Hmrishav and Zou, Kai and Song, Yi-Zhe}, title = {NitroFusion: High-Fidelity Single-Step Diffusion through Dynamic Adversarial Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7654-7663} }
CMMLoc: Advancing Text-to-PointCloud Localization with Cauchy-Mixture-Model Based Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yanlong and Qu, Haoxuan and Liu, Jun and Zhang, Wenxiao and Yang, Xun}, title = {CMMLoc: Advancing Text-to-PointCloud Localization with Cauchy-Mixture-Model Based Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6637-6647} }
RC-AutoCalib: An End-to-End Radar-Camera Automatic Calibration Network-
[pdf]
[supp]
[bibtex]@InProceedings{Luu_2025_CVPR, author = {Luu, Van-Tin and Cai, Yon-Lin and Tran, Vu-Hoang and Chiu, Wei-Chen and Chen, Yi-Ting and Huang, Ching-Chun}, title = {RC-AutoCalib: An End-to-End Radar-Camera Automatic Calibration Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6700-6709} }
Argus: A Compact and Versatile Foundation Model for Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Weiming and Chen, Chen and Li, Zhizhong and Sajadmanesh, Sina and Li, Jingtao and Huang, Jiabo and Sehwag, Vikash and Sharma, Vivek and Shinozaki, Hirotaka and Garcia, Felan Carlo and Zhan, Yihao and Adachi, Naohiro and Eki, Ryoji and Spranger, Michael and Stone, Peter and Lyu, Lingjuan}, title = {Argus: A Compact and Versatile Foundation Model for Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4418-4429} }
Sampling Innovation-Based Adaptive Compressive Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Zhifu and Hu, Tao and Niu, Chaoyang and Wu, Di and Wang, Shu}, title = {Sampling Innovation-Based Adaptive Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2387-2397} }
MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2025_CVPR, author = {Hong, Wenyi and Cheng, Yean and Yang, Zhuoyi and Wang, Weihan and Wang, Lefan and Gu, Xiaotao and Huang, Shiyu and Dong, Yuxiao and Tang, Jie}, title = {MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8450-8460} }
ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pu_2025_CVPR, author = {Pu, Yifan and Zhao, Yiming and Tang, Zhicong and Yin, Ruihong and Ye, Haoxing and Yuan, Yuhui and Chen, Dong and Bao, Jianmin and Zhang, Sirui and Wang, Yanbin and Liang, Lin and Wang, Lijuan and Li, Ji and Li, Xiu and Lian, Zhouhui and Huang, Gao and Guo, Baining}, title = {ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7952-7962} }
ArcPro: Architectural Programs for Structured 3D Abstraction of Sparse Points-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Qirui and Zhang, Runze and Liu, Kangjun and Gong, Minglun and Zhang, Hao and Huang, Hui}, title = {ArcPro: Architectural Programs for Structured 3D Abstraction of Sparse Points}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6563-6572} }
Hardware-Rasterized Ray-Based Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Bulo_2025_CVPR, author = {Bul\`o, Samuel Rota and Bartolovic, Nemanja and Porzi, Lorenzo and Kontschieder, Peter}, title = {Hardware-Rasterized Ray-Based Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {485-494} }
FreqDebias: Towards Generalizable Deepfake Detection via Consistency-Driven Frequency Debiasing-
[pdf]
[supp]
[bibtex]@InProceedings{Kashiani_2025_CVPR, author = {Kashiani, Hossein and Talemi, Niloufar Alipour and Afghah, Fatemeh}, title = {FreqDebias: Towards Generalizable Deepfake Detection via Consistency-Driven Frequency Debiasing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8775-8785} }
Multi-subject Open-set Personalization in Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Menapace, Willi and Fang, Yuwei and Lee, Kwot Sin and Skorokhodov, Ivan and Aberman, Kfir and Zhu, Jun-Yan and Yang, Ming-Hsuan and Tulyakov, Sergey}, title = {Multi-subject Open-set Personalization in Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6099-6110} }
Wav2Sem: Plug-and-Play Audio Semantic Decoupling for 3D Speech-Driven Facial Animation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hao and Dai, Ju and Zhao, Xin and Zhou, Feng and Pan, Junjun and Li, Lei}, title = {Wav2Sem: Plug-and-Play Audio Semantic Decoupling for 3D Speech-Driven Facial Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {183-192} }
FG^2: Fine-Grained Cross-View Localization by Fine-Grained Feature Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Zimin and Alahi, Alexandre}, title = {FG{\textasciicircum}2: Fine-Grained Cross-View Localization by Fine-Grained Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6362-6372} }
Distilled Prompt Learning for Incomplete Multimodal Survival Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yingxue and Zhou, Fengtao and Zhao, Chenyu and Wang, Yihui and Yang, Can and Chen, Hao}, title = {Distilled Prompt Learning for Incomplete Multimodal Survival Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5102-5111} }
Learning Conditional Space-Time Prompt Distributions for Video Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2025_CVPR, author = {Zou, Xiaohan and Ma, Wenchao and Zhao, Shu}, title = {Learning Conditional Space-Time Prompt Distributions for Video Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4862-4873} }
Hyperbolic Safety-Aware Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Poppi_2025_CVPR, author = {Poppi, Tobia and Kasarla, Tejaswi and Mettes, Pascal and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Hyperbolic Safety-Aware Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4222-4232} }
SinGS: Animatable Single-Image Human Gaussian Splats with Kinematic Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yufan and Chen, Xuanhong and Li, Wen and Jia, Shunran and Wei, Hualiang and Feng, Kairui and Chen, Jialiang and Li, Yuhan and He, Ang and Zhang, Weimin and Ni, Bingbing and Zhang, Wenjun}, title = {SinGS: Animatable Single-Image Human Gaussian Splats with Kinematic Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5571-5580} }
Multi-Modal Synergistic Implicit Image Enhancement for Efficient Optical Flow Estimation-
[pdf]
[bibtex]@InProceedings{Dai_2025_CVPR, author = {Dai, Weichen and Wu, Hexing and Weng, Xiaoyang and Zheng, Yuxin and Ming, Yuhang and Kong, Wanzeng}, title = {Multi-Modal Synergistic Implicit Image Enhancement for Efficient Optical Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2173-2182} }
Generating Multimodal Driving Scenes via Next-Scene Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yanhao and Zhang, Haoyang and Lin, Tianwei and Huang, Lichao and Luo, Shujie and Wu, Rui and Qiu, Congpei and Ke, Wei and Zhang, Tong}, title = {Generating Multimodal Driving Scenes via Next-Scene Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6844-6853} }
Symmetry Strikes Back: From Single-Image Symmetry Detection to 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiang and Huang, Zixuan and Thai, Anh and Rehg, James M.}, title = {Symmetry Strikes Back: From Single-Image Symmetry Detection to 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {743-752} }
PosterMaker: Towards High-Quality Product Poster Generation with Accurate Text Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Yifan and Lin, Zihang and Liu, Chuanbin and Zhou, Min and Ge, Tiezheng and Zheng, Bo and Xie, Hongtao}, title = {PosterMaker: Towards High-Quality Product Poster Generation with Accurate Text Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8083-8093} }
Rethinking Personalized Aesthetics Assessment: Employing Physique Aesthetics Assessment as An Exemplification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Haobin and He, Shuai and Ming, Anlong and Ma, Huadong}, title = {Rethinking Personalized Aesthetics Assessment: Employing Physique Aesthetics Assessment as An Exemplification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2935-2944} }
You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Baorui and Gao, Huachen and Deng, Haoge and Luo, Zhengxiong and Huang, Tiejun and Tang, Lulu and Wang, Xinlong}, title = {You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2016-2029} }
PEACE: Empowering Geologic Map Holistic Understanding with MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yangyu and Gao, Tianyi and Xu, Haoran and Zhao, Qihao and Song, Yang and Gui, Zhipeng and Lv, Tengchao and Chen, Hao and Cui, Lei and Li, Scarlett and Wei, Furu}, title = {PEACE: Empowering Geologic Map Holistic Understanding with MLLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3899-3908} }
ConceptGuard: Continual Personalized Text-to-Image Generation with Forgetting and Confusion Mitigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zirun and Jin, Tao}, title = {ConceptGuard: Continual Personalized Text-to-Image Generation with Forgetting and Confusion Mitigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2945-2954} }
MARVEL-40M+: Multi-Level Visual Elaboration for High-Fidelity Text-to-3D Content Creation-
[pdf]
[supp]
[bibtex]@InProceedings{Sinha_2025_CVPR, author = {Sinha, Sankalp and Khan, Mohammad Sadil and Usama, Muhammad and Sam, Shino and Stricker, Didier and Ali, Sk Aziz and Afzal, Muhammad Zeshan}, title = {MARVEL-40M+: Multi-Level Visual Elaboration for High-Fidelity Text-to-3D Content Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8105-8116} }
ERUPT: Efficient Rendering with Unposed Patch Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shugaev_2025_CVPR, author = {Shugaev, Maxim V. and Chen, Vincent and Karrenbach, Maxim and Ashley, Kyle and Kennedy, Bridget and Cuntoor, Naresh P.}, title = {ERUPT: Efficient Rendering with Unposed Patch Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6057-6067} }
Rethinking End-to-End 2D to 3D Scene Segmentation in Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Runsong and Qiu, Shi and Liu, Zhengzhe and Hui, Ka-Hei and Wu, Qianyi and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {Rethinking End-to-End 2D to 3D Scene Segmentation in Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3656-3665} }
Quad-Pixel Image Defocus Deblurring: A New Benchmark and Model-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Hang and Xie, Yin and Peng, Xiaoxiu and Sun, Lihu and Su, Wenkai and Yang, Xiaodong and Liu, Chengming}, title = {Quad-Pixel Image Defocus Deblurring: A New Benchmark and Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5709-5719} }
GEAL: Generalizable 3D Affordance Learning with Cross-Modal Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Dongyue and Kong, Lingdong and Huang, Tianxin and Lee, Gim Hee}, title = {GEAL: Generalizable 3D Affordance Learning with Cross-Modal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1680-1690} }
Dynamic Derivation and Elimination: Audio Visual Segmentation with Enhanced Audio Semantics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Chen and Yang, Liying and Li, Peike and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Dynamic Derivation and Elimination: Audio Visual Segmentation with Enhanced Audio Semantics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3131-3141} }
ESC: Erasing Space Concept for Knowledge Deletion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Tae-Young and Park, Sundong and Jeon, Minwoo and Hwang, Hyoseok and Park, Gyeong-Moon}, title = {ESC: Erasing Space Concept for Knowledge Deletion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5010-5019} }
Temporal Separation with Entropy Regularization for Knowledge Distillation in Spiking Neural Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Kairong and Yu, Chengting and Zhang, Tianqing and Zhao, Xiaochen and Yang, Shu and Wang, Hongwei and Zhang, Qiang and Xu, Qi}, title = {Temporal Separation with Entropy Regularization for Knowledge Distillation in Spiking Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8806-8816} }
Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Song and Yu, Yi and Yang, Wenhan and Ding, Meiwen and Chen, Zhuo and Duan, Ling-Yu and Kot, Alex C. and Jiang, Xudong}, title = {Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8753-8763} }
Flowing from Words to Pixels: A Noise-Free Framework for Cross-Modality Evolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Qihao and Yin, Xi and Yuille, Alan and Brown, Andrew and Singh, Mannat}, title = {Flowing from Words to Pixels: A Noise-Free Framework for Cross-Modality Evolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2755-2765} }
Interpretable Generative Models through Post-hoc Concept Bottlenecks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2025_CVPR, author = {Kulkarni, Akshay and Yan, Ge and Sun, Chung-En and Oikarinen, Tuomas and Weng, Tsui-Wei}, title = {Interpretable Generative Models through Post-hoc Concept Bottlenecks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8162-8171} }
Watermarking One for All: A Robust Watermarking Scheme Against Partial Image Theft-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Gaozhi and Cao, Silu and Qian, Zhenxing and Zhang, Xinpeng and Li, Sheng and Peng, Wanli}, title = {Watermarking One for All: A Robust Watermarking Scheme Against Partial Image Theft}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8225-8234} }
Minding Fuzzy Regions: A Data-driven Alternating Learning Paradigm for Stable Lesion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Lexin and Xu, Yunyang and Ma, Xiang and Li, Xuemei and Zhang, Caiming}, title = {Minding Fuzzy Regions: A Data-driven Alternating Learning Paradigm for Stable Lesion Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10425-10434} }
IM-Zero: Instance-level Motion Controllable Video Generation in a Zero-shot Manner-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yuyang and Chen, Yabo and Ding, Li and Zhang, Xiaopeng and Dai, Wenrui and Zou, Junni and Xiong, Hongkai and Tian, Qi}, title = {IM-Zero: Instance-level Motion Controllable Video Generation in a Zero-shot Manner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7265-7275} }
Link-based Contrastive Learning for One-Shot Unsupervised Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yue and Bin, Mingyue and Zhang, Yuyang and Wang, Zhongyuan and Han, Zhen and Liang, Chao}, title = {Link-based Contrastive Learning for One-Shot Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4916-4926} }
UniMamba: Unified Spatial-Channel Representation Learning with Group-Efficient Mamba for LiDAR-based 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jin_2025_CVPR, author = {Jin, Xin and Su, Haisheng and Liu, Kai and Ma, Cong and Wu, Wei and HUI, Fei and Yan, Junchi}, title = {UniMamba: Unified Spatial-Channel Representation Learning with Group-Efficient Mamba for LiDAR-based 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1407-1417} }
Rethinking the Adversarial Robustness of Multi-Exit Neural Networks in an Attack-Defense Game-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Keyizhi and Zhang, Chi and Chen, Zhan and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {Rethinking the Adversarial Robustness of Multi-Exit Neural Networks in an Attack-Defense Game}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10265-10274} }
VideoGuide: Improving Video Diffusion Models without Training Through a Teacher's Guide-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Dohun and Kim, Bryan Sangwoo and Park, Geon Yeong and Ye, Jong Chul}, title = {VideoGuide: Improving Video Diffusion Models without Training Through a Teacher's Guide}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2599-2608} }
OmniStereo: Real-time Omnidireactional Depth Estimation with Multiview Fisheye Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Jiaxi and Wang, Yushen and Meng, Haitao and Hou, Zuoxun and Chang, Yi and Chen, Gang}, title = {OmniStereo: Real-time Omnidireactional Depth Estimation with Multiview Fisheye Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1003-1012} }
DroneSplat: 3D Gaussian Splatting for Robust 3D Reconstruction from In-the-Wild Drone Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Jiadong and Gao, Yu and Yang, Dianyi and Yan, Liqi and Yue, Yufeng and Yang, Yi}, title = {DroneSplat: 3D Gaussian Splatting for Robust 3D Reconstruction from In-the-Wild Drone Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {833-843} }
SDGOCC: Semantic and Depth-Guided Bird's-Eye View Transformation for 3D Multimodal Occupancy Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, ZaiPeng and Dang, ChenXu and Hu, Xuzhong and An, Pei and Ding, Junfeng and Zhan, Jie and Xu, YunBiao and Ma, Jie}, title = {SDGOCC: Semantic and Depth-Guided Bird's-Eye View Transformation for 3D Multimodal Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6751-6760} }
Efficient Video Face Enhancement with Enhanced Spatial-Temporal Consistency-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yutong and Teng, Jiajie and Cao, Jiajiong and Li, Yuming and Ma, Chenguang and Xu, Hongteng and Luo, Dixin}, title = {Efficient Video Face Enhancement with Enhanced Spatial-Temporal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2183-2193} }
IDProtector: An Adversarial Noise Encoder to Protect Against ID-Preserving Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Yiren and Yang, Pei and Ci, Hai and Shou, Mike Zheng}, title = {IDProtector: An Adversarial Noise Encoder to Protect Against ID-Preserving Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3019-3028} }
LoTUS: Large-Scale Machine Unlearning with a Taste of Uncertainty-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Spartalis_2025_CVPR, author = {Spartalis, Christoforos N. and Semertzidis, Theodoros and Gavves, Efstratios and Daras, Petros}, title = {LoTUS: Large-Scale Machine Unlearning with a Taste of Uncertainty}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10046-10055} }
SleeperMark: Towards Robust Watermark against Fine-Tuning Text-to-image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zilan and Guo, Junfeng and Zhu, Jiacheng and Li, Yiming and Huang, Heng and Chen, Muhao and Tu, Zhengzhong}, title = {SleeperMark: Towards Robust Watermark against Fine-Tuning Text-to-image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8213-8224} }
Illumination Spectrum Estimation for Multispectral Images via Surface Reflectance Modeling and Spatial-Spectral Feature Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2025_CVPR, author = {Oh, Hyejin and Kim, Woo-Shik and Lee, Sangyoon and Park, YungKyung and Kang, Je-Won}, title = {Illumination Spectrum Estimation for Multispectral Images via Surface Reflectance Modeling and Spatial-Spectral Feature Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2215-2225} }
Towards Explainable and Unprecedented Accuracy in Matching Challenging Finger Crease Patterns-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zhenyu and Dong, Chengdong and Kumar, Ajay}, title = {Towards Explainable and Unprecedented Accuracy in Matching Challenging Finger Crease Patterns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6212-6221} }
Neural Hierarchical Decomposition for Single Image Plant Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhihao and Cheng, Zhanglin and Yokoya, Naoto}, title = {Neural Hierarchical Decomposition for Single Image Plant Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {733-742} }
Dual-Agent Optimization framework for Cross-Domain Few-Shot Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhaoyang and Wang, Yuan and Li, Wangkai and Zhang, Tianzhu and Liu, Xiang}, title = {Dual-Agent Optimization framework for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9849-9859} }
SACB-Net: Spatial-awareness Convolutions for Medical Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Xinxing and Zhang, Tianyang and Lu, Wenqi and Meng, Qingjie and Frangi, Alejandro F. and Duan, Jinming}, title = {SACB-Net: Spatial-awareness Convolutions for Medical Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5227-5237} }
Text Embedding is Not All You Need: Attention Control for Text-to-Image Semantic Alignment with Text Self-Attention Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jeeyung and Esmaeili, Erfan and Qiu, Qiang}, title = {Text Embedding is Not All You Need: Attention Control for Text-to-Image Semantic Alignment with Text Self-Attention Maps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8031-8040} }
DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jinyuan and Zhang, Bowei and Mei, Qingyun and Li, Xingyuan and Zou, Yang and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2226-2235} }
AIpparel: A Multimodal Foundation Model for Digital Garments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nakayama_2025_CVPR, author = {Nakayama, Kiyohiro and Ackermann, Jan and Kesdogan, Timur Levent and Zheng, Yang and Korosteleva, Maria and Sorkine-Hornung, Olga and Guibas, Leonidas J. and Yang, Guandao and Wetzstein, Gordon}, title = {AIpparel: A Multimodal Foundation Model for Digital Garments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8138-8149} }
PO3AD: Predicting Point Offsets toward Better 3D Point Cloud Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Jianan and Zhao, Weiguang and Yang, Xi and Cheng, Guangliang and Huang, Kaizhu}, title = {PO3AD: Predicting Point Offsets toward Better 3D Point Cloud Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1353-1362} }
ICT: Image-Object Cross-Level Trusted Intervention for Mitigating Object Hallucination in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Junzhe and Zhang, Tianshu and Huang, Shiyu and Niu, Yuwei and Zhang, Linfeng and Wen, Lijie and Hu, Xuming}, title = {ICT: Image-Object Cross-Level Trusted Intervention for Mitigating Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4209-4221} }
PreciseCam: Precise Camera Control for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bernal-Berdun_2025_CVPR, author = {Bernal-Berdun, Edurne and Serrano, Ana and Masia, Belen and Gadelha, Matheus and Hold-Geoffroy, Yannick and Sun, Xin and Gutierrez, Diego}, title = {PreciseCam: Precise Camera Control for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2724-2733} }
SET: Spectral Enhancement for Tiny Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Huixin and Wang, Runqi and Li, Yanjing and Yang, Linlin and Lin, Shaohui and Cao, Xianbin and Zhang, Baochang}, title = {SET: Spectral Enhancement for Tiny Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4713-4723} }
Differentiable Inverse Rendering with Interpretable Basis BRDFs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2025_CVPR, author = {Chung, Hoon-Gyu and Choi, Seokjun and Baek, Seung-Hwan}, title = {Differentiable Inverse Rendering with Interpretable Basis BRDFs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {475-484} }
EquiPose: Exploiting Permutation Equivariance for Relative Camera Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzhen and Dong, Qiulei}, title = {EquiPose: Exploiting Permutation Equivariance for Relative Camera Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1127-1137} }
Face Forgery Video Detection via Temporal Forgery Cue Unraveling-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Zonghui and Liu, Yingjie and Zhang, Jie and Zheng, Haiyong and Shan, Shiguang}, title = {Face Forgery Video Detection via Temporal Forgery Cue Unraveling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7396-7405} }
Temporally Consistent Object-Centric Learning by Contrasting Slots-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Manasyan_2025_CVPR, author = {Manasyan, Anna and Seitzer, Maximilian and Radovic, Filip and Martius, Georg and Zadaianchuk, Andrii}, title = {Temporally Consistent Object-Centric Learning by Contrasting Slots}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5401-5411} }
MC^2: Multi-concept Guidance for Customized Multi-concept Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jiaxiu and Zhang, Yabo and Feng, Kailai and Wu, Xiaohe and Li, Wenbo and Pei, Renjing and Li, Fan and Zuo, Wangmeng}, title = {MC{\textasciicircum}2: Multi-concept Guidance for Customized Multi-concept Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2802-2812} }
Multi-modal Vision Pre-training for Medical Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rui_2025_CVPR, author = {Rui, Shaohao and Chen, Lingzhi and Tang, Zhenyu and Wang, Lilong and Liu, Mianxin and Zhang, Shaoting and Wang, Xiaosong}, title = {Multi-modal Vision Pre-training for Medical Image Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5164-5174} }
STEP: Enhancing Video-LLMs' Compositional Reasoning by Spatio-Temporal Graph-guided Self-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2025_CVPR, author = {Qiu, Haiyi and Gao, Minghe and Qian, Long and Pan, Kaihang and Yu, Qifan and Li, Juncheng and Wang, Wenjie and Tang, Siliang and Zhuang, Yueting and Chua, Tat-Seng}, title = {STEP: Enhancing Video-LLMs' Compositional Reasoning by Spatio-Temporal Graph-guided Self-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3284-3294} }
LIM: Large Interpolator Model for Dynamic Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sabathier_2025_CVPR, author = {Sabathier, Remy and Mitra, Niloy J. and Novotny, David}, title = {LIM: Large Interpolator Model for Dynamic Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6154-6164} }
AutoPresent: Designing Structured Visuals from Scratch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2025_CVPR, author = {Ge, Jiaxin and Wang, Zora Zhiruo and Zhou, Xuhui and Peng, Yi-Hao and Subramanian, Sanjay and Tan, Qinyue and Sap, Maarten and Suhr, Alane and Fried, Daniel and Neubig, Graham and Darrell, Trevor}, title = {AutoPresent: Designing Structured Visuals from Scratch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2902-2911} }
VisionArena: 230k Real World User-VLM Conversations with Preference Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chou_2025_CVPR, author = {Chou, Christopher and Dunlap, Lisa and Mashita, Koki and Mandal, Krishna and Darrell, Trevor and Stoica, Ion and Gonzalez, Joseph E. and Chiang, Wei-Lin}, title = {VisionArena: 230k Real World User-VLM Conversations with Preference Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3877-3887} }
FAM Diffusion: Frequency and Attention Modulation for High-Resolution Image Generation with Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Haosen and Bulat, Adrian and Hadji, Isma and Pham, Hai X. and Zhu, Xiatian and Tzimiropoulos, Georgios and Martinez, Brais}, title = {FAM Diffusion: Frequency and Attention Modulation for High-Resolution Image Generation with Stable Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2459-2468} }
MultiGO: Towards Multi-level Geometry Learning for Monocular 3D Textured Human Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Gangjian and Yao, Nanjie and Zhang, Shunsi and Zhao, Hanfeng and Pang, Guoliang and Shu, Jian and Wang, Hao}, title = {MultiGO: Towards Multi-level Geometry Learning for Monocular 3D Textured Human Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {338-347} }
Generative Photomontage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Sean J. and Kumari, Nupur and Shamir, Ariel and Zhu, Jun-Yan}, title = {Generative Photomontage}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7931-7941} }
Multi-view Reconstruction via SfM-guided Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Haoyu and Zhu, He and Peng, Sida and Lin, Haotong and Yan, Yunzhi and Xie, Tao and Wang, Wenguan and Zhou, Xiaowei and Bao, Hujun}, title = {Multi-view Reconstruction via SfM-guided Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5272-5282} }
HuMoCon: Concept Discovery for Human Motion Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2025_CVPR, author = {Fang, Qihang and Tang, Chengcheng and Tekin, Bugra and Ma, Shugao and Yang, Yanchao}, title = {HuMoCon: Concept Discovery for Human Motion Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7179-7190} }
FreeScene: Mixed Graph Diffusion for 3D Scene Synthesis from Free Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Tongyuan and Bai, Wangyuanfan and Chen, Dong and Wu, Tieru and Li, Manyi and Ma, Rui}, title = {FreeScene: Mixed Graph Diffusion for 3D Scene Synthesis from Free Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5893-5903} }
Rethinking Correspondence-based Category-Level Object Pose Estimation-
[pdf]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Huan and Yang, Wenfei and Zhang, Shifeng and Zhang, Tianzhu}, title = {Rethinking Correspondence-based Category-Level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1170-1179} }
Curriculum Direct Preference Optimization for Diffusion and Consistency Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Croitoru_2025_CVPR, author = {Croitoru, Florinel-Alin and Hondru, Vlad and Ionescu, Radu Tudor and Sebe, Nicu and Shah, Mubarak}, title = {Curriculum Direct Preference Optimization for Diffusion and Consistency Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2824-2834} }
Personalized Preference Fine-tuning of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dang_2025_CVPR, author = {Dang, Meihua and Singh, Anikait and Zhou, Linqi and Ermon, Stefano and Song, Jiaming}, title = {Personalized Preference Fine-tuning of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8020-8030} }
NN-Former: Rethinking Graph Structure in Neural Architecture Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Ruihan and Zhang, Haokui and Wang, Yaowei and Zeng, Wei and Zhang, Shiliang}, title = {NN-Former: Rethinking Graph Structure in Neural Architecture Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10004-10014} }
A Unified Image-Dense Annotation Generation Model for Underwater Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Hongkai and Liang, Dingkang and Qi, Zhenghao and Bai, Xiang}, title = {A Unified Image-Dense Annotation Generation Model for Underwater Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {961-970} }
NTR-Gaussian: Nighttime Dynamic Thermal Reconstruction with 4D Gaussian Splatting Based on Thermodynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Kun and Liu, Yuxiang and Cui, Zeyu and Liu, Yu and Zhang, Maojun and Yan, Shen and Wang, Qing}, title = {NTR-Gaussian: Nighttime Dynamic Thermal Reconstruction with 4D Gaussian Splatting Based on Thermodynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {691-700} }
FSHNet: Fully Sparse Hybrid Network for 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shuai and Cui, Mingyue and Li, Boyang and Liang, Quanmin and Hong, Tinghe and Huang, Kai and Shan, Yunxiao and Huang, Kai}, title = {FSHNet: Fully Sparse Hybrid Network for 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8900-8909} }
JTD-UAV: MLLM-Enhanced Joint Tracking and Description Framework for Anti-UAV Systems-
[pdf]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yifan and Zhao, Jian and Fan, Zhaoxin and Zhang, Xin and Wu, Xuecheng and Zhang, Yudian and Jin, Lei and Li, Xinyue and Wang, Gang and Jia, Mengxi and Hu, Ping and Zhu, Zheng and Li, Xuelong}, title = {JTD-UAV: MLLM-Enhanced Joint Tracking and Description Framework for Anti-UAV Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1633-1644} }
HaWoR: World-Space Hand Motion Reconstruction from Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinglei and Deng, Jiankang and Ma, Chao and Potamias, Rolandos Alexandros}, title = {HaWoR: World-Space Hand Motion Reconstruction from Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1805-1815} }
High Temporal Consistency through Semantic Similarity Propagation in Semi-Supervised Video Semantic Segmentation for Autonomous Flight-
[pdf]
[supp]
[bibtex]@InProceedings{Vincent_2025_CVPR, author = {Vincent, C\'edric and Kim, Taehyoung and Mee{\ss}, Henri}, title = {High Temporal Consistency through Semantic Similarity Propagation in Semi-Supervised Video Semantic Segmentation for Autonomous Flight}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1461-1471} }
Generative Gaussian Splatting for Unbounded 3D City Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Haozhe and Chen, Zhaoxi and Hong, Fangzhou and Liu, Ziwei}, title = {Generative Gaussian Splatting for Unbounded 3D City Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6111-6120} }
GeoMM: On Geodesic Perspective for Multi-modal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Shibin and Wang, Hang and Ni, Bingbing}, title = {GeoMM: On Geodesic Perspective for Multi-modal Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4776-4786} }
VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Xueqing and Ding, Yuheng and Li, Bingxuan and Lu, Pan and Yin, Da and Chang, Kai-Wei and Peng, Nanyun}, title = {VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9527-9537} }
Adaptive Dropout: Unleashing Dropout across Layers for Generalizable Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Hang and Huang, Jie and Yu, Wei and Tan, Jiangtong and Zou, Zhen and Zhao, Feng}, title = {Adaptive Dropout: Unleashing Dropout across Layers for Generalizable Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7513-7523} }
Breaking the Memory Barrier of Contrastive Loss via Tile-Based Strategy-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Zesen and Zhang, Hang and Li, Kehan and Leng, Sicong and Hu, Zhiqiang and Wu, Fei and Zhao, Deli and Li, Xin and Bing, Lidong}, title = {Breaking the Memory Barrier of Contrastive Loss via Tile-Based Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10036-10045} }
Learning Phase Distortion with Selective State Space Models for Video Turbulence Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xingguang and Chimitt, Nicholas and Wang, Xijun and Yuan, Yu and Chan, Stanley H.}, title = {Learning Phase Distortion with Selective State Space Models for Video Turbulence Mitigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2127-2138} }
RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goswami_2025_CVPR, author = {Goswami, Raktim Gautam and Krishnamurthy, Prashanth and LeCun, Yann and Khorrami, Farshad}, title = {RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6930-6939} }
Distraction is All You Need for Multimodal Large Language Model Jailbreaking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zuopeng and Fan, Jiluan and Yan, Anli and Gao, Erdun and Lin, Xin and Li, Tao and Mo, Kanghua and Dong, Changyu}, title = {Distraction is All You Need for Multimodal Large Language Model Jailbreaking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9467-9476} }
Learning to Normalize on the SPD Manifold under Bures-Wasserstein Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Rui and Jin, Shaocheng and Chen, Ziheng and Luo, Xiaoqing and Wu, Xiao-Jun}, title = {Learning to Normalize on the SPD Manifold under Bures-Wasserstein Geometry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8289-8298} }
SAMWISE: Infusing Wisdom in SAM2 for Text-Driven Video Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cuttano_2025_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Rosi, Gabriele and Masone, Carlo and Averta, Giuseppe}, title = {SAMWISE: Infusing Wisdom in SAM2 for Text-Driven Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3395-3405} }
BEVDiffuser: Plug-and-Play Diffusion Model for BEV Denoising with Ground-Truth Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Xin and Yaman, Burhaneddin and Cheng, Sheng and Tao, Feng and Mallik, Abhirup and Ren, Liu}, title = {BEVDiffuser: Plug-and-Play Diffusion Model for BEV Denoising with Ground-Truth Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1495-1504} }
FinePhys: Fine-grained Human Action Generation by Explicitly Incorporating Physical Laws for Effective Skeletal Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2025_CVPR, author = {Shao, Dian and Shi, Mingfei and Xu, Shengda and Chen, Haodong and Huang, Yongle and Wang, Binglu}, title = {FinePhys: Fine-grained Human Action Generation by Explicitly Incorporating Physical Laws for Effective Skeletal Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1905-1916} }
SeedVR: Seeding Infinity in Diffusion Transformer Towards Generic Video Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jianyi and Lin, Zhijie and Wei, Meng and Zhao, Yang and Yang, Ceyuan and Loy, Chen Change and Jiang, Lu}, title = {SeedVR: Seeding Infinity in Diffusion Transformer Towards Generic Video Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2161-2172} }
Beyond Generation: A Diffusion-based Low-level Feature Extractor for Detecting AI-generated Images-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Nan and Chen, Haoyu and Xu, Yiran and Qian, Zhenxing and Zhang, Xinpeng}, title = {Beyond Generation: A Diffusion-based Low-level Feature Extractor for Detecting AI-generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8258-8268} }
Optical-Flow Guided Prompt Optimization for Coherent Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_CVPR, author = {Nam, Hyelin and Kim, Jaemin and Lee, Dohun and Ye, Jong Chul}, title = {Optical-Flow Guided Prompt Optimization for Coherent Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7837-7846} }
Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Enshen and Su, Qi and Chi, Cheng and Zhang, Zhizheng and Wang, Zhongyuan and Huang, Tiejun and Sheng, Lu and Wang, He}, title = {Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6919-6929} }
Rethinking Reconstruction and Denoising in the Dark: New Perspective, General Architecture and Beyond-
[pdf]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Tengyu and Ma, Long and Li, Ziye and Wang, Yuetong and Liu, Jinyuan and Xu, Chengpei and Liu, Risheng}, title = {Rethinking Reconstruction and Denoising in the Dark: New Perspective, General Architecture and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2323-2332} }
Federated Learning with Domain Shift Eraser-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zheng and Wang, Zihui and Wang, Zheng and Fan, Xiaoliang and Wang, Cheng}, title = {Federated Learning with Domain Shift Eraser}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4978-4987} }
DiTCtrl: Exploring Attention Control in Multi-Modal Diffusion Transformer for Tuning-Free Multi-Prompt Longer Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Minghong and Cun, Xiaodong and Li, Xiaoyu and Liu, Wenze and Zhang, Zhaoyang and Zhang, Yong and Shan, Ying and Yue, Xiangyu}, title = {DiTCtrl: Exploring Attention Control in Multi-Modal Diffusion Transformer for Tuning-Free Multi-Prompt Longer Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7763-7772} }
Link to the Past: Temporal Propagation for Fast 3D Human Reconstruction from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marchellus_2025_CVPR, author = {Marchellus, Matthew and Noor, Nadhira and Park, In Kyu}, title = {Link to the Past: Temporal Propagation for Fast 3D Human Reconstruction from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6190-6199} }
Deterministic Certification of Graph Neural Networks against Graph Poisoning Attacks with Arbitrary Perturbations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiate and Pang, Meng and Dong, Yun and Wang, Binghui}, title = {Deterministic Certification of Graph Neural Networks against Graph Poisoning Attacks with Arbitrary Perturbations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5020-5029} }
A3: Few-shot Prompt Learning of Unlearnable Examples with Cross-Modal Adversarial Feature Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xuan and Gao, Xitong and Liao, Dongping and Qin, Tianrui and Lu, Yu-liang and Xu, Cheng-zhong}, title = {A3: Few-shot Prompt Learning of Unlearnable Examples with Cross-Modal Adversarial Feature Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9507-9516} }
MVPaint: Synchronized Multi-View Diffusion for Painting Anything 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Wei and Mu, Juncheng and Zeng, Xianfang and Chen, Xin and Pang, Anqi and Zhang, Chi and Wang, Zhibin and Fu, Bin and Yu, Gang and Liu, Ziwei and Pan, Liang}, title = {MVPaint: Synchronized Multi-View Diffusion for Painting Anything 3D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {585-594} }
ASAP: Advancing Semantic Alignment Promotes Multi-Modal Manipulation Detecting and Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhenxing and Wang, Yaxiong and Cheng, Lechao and Zhong, Zhun and Guo, Dan and Wang, Meng}, title = {ASAP: Advancing Semantic Alignment Promotes Multi-Modal Manipulation Detecting and Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4005-4014} }
MotiF: Making Text Count in Image Animation with Motion Focal Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shijie and Azadi, Samaneh and Girdhar, Rohit and Rambhatla, Saketh and Sun, Chen and Yin, Xi}, title = {MotiF: Making Text Count in Image Animation with Motion Focal Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7773-7783} }
Towards Explicit Geometry-Reflectance Collaboration for Generalized LiDAR Segmentation in Adverse Weather-
[pdf]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Longyu and Hu, Ping and Yuan, Shangbo and Zhang, Lu and Liu, Jun and Shen, Hengtao and Zhu, Xiaofeng}, title = {Towards Explicit Geometry-Reflectance Collaboration for Generalized LiDAR Segmentation in Adverse Weather}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {139-149} }
MaskGaussian: Adaptive 3D Gaussian Representation from Probabilistic Masks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yifei and Zhong, Zhihang and Zhan, Yifan and Xu, Sheng and Sun, Xiao}, title = {MaskGaussian: Adaptive 3D Gaussian Representation from Probabilistic Masks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {681-690} }
SoundVista: Novel-View Ambient Sound Synthesis via Visual-Acoustic Binding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Mingfei and Gebru, Israel D. and Ananthabhotla, Ishwarya and Richardt, Christian and Markovic, Dejan and Sandakly, Jake and Krenn, Steven and Keebler, Todd and Shlizerman, Eli and Richard, Alexander}, title = {SoundVista: Novel-View Ambient Sound Synthesis via Visual-Acoustic Binding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8331-8341} }
CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abbasi_2025_CVPR, author = {Abbasi, Reza and Nazari, Ali and Sefid, Aminreza and Banayeeanzade, Mohammadali and Rohban, Mohammad Hossein and Baghshah, Mahdieh Soleymani}, title = {CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9308-9317} }
Navigating Image Restoration with VAR's Distribution Alignment Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Siyang and Zheng, Naishan and Huang, Jie and Zhao, Feng}, title = {Navigating Image Restoration with VAR's Distribution Alignment Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7559-7569} }
Dissecting and Mitigating Diffusion Bias via Mechanistic Interpretability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Yingdong and Li, Changming and Wang, Yifan and Zhao, Yongxiang and Pang, Anqi and Yang, Sibei and Yu, Jingyi and Ren, Kan}, title = {Dissecting and Mitigating Diffusion Bias via Mechanistic Interpretability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8192-8202} }
Graph Neural Network Combining Event Stream and Periodic Aggregation for Low-Latency Event-based Vision-
[pdf]
[bibtex]@InProceedings{Dampfhoffer_2025_CVPR, author = {Dampfhoffer, Manon and Mesquida, Thomas and Joubert, Damien and Dalgaty, Thomas and Vivet, Pascal and Posch, Christoph}, title = {Graph Neural Network Combining Event Stream and Periodic Aggregation for Low-Latency Event-based Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6909-6918} }
ArtFormer: Controllable Generation of Diverse 3D Articulated Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Jiayi and Feng, Youhe and Li, Zheng and Song, Jinhua and He, Yangfan and Ren, Botao and Xu, Botian}, title = {ArtFormer: Controllable Generation of Diverse 3D Articulated Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1894-1904} }
Bridging Gait Recognition and Large Language Models Sequence Modeling-
[pdf]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Shaopeng and Wang, Jilong and Hou, Saihui and Liu, Xu and Cao, Chunshui and Wang, Liang and Huang, Yongzhen}, title = {Bridging Gait Recognition and Large Language Models Sequence Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3460-3469} }
DiSRT-In-Bed: Diffusion-Based Sim-to-Real Transfer Framework for In-Bed Human Mesh Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Jing and Zheng, Ce and Jeni, Laszlo A. and Erickson, Zackory}, title = {DiSRT-In-Bed: Diffusion-Based Sim-to-Real Transfer Framework for In-Bed Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1829-1838} }
COUNTS: Benchmarking Object Detectors and Multimodal Large Language Models under Distribution Shifts-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiansheng and Zhang, Xingxuan and Zou, Hao and Guo, Yige and Xu, Renzhe and Liu, Yilong and Zhu, Chuzhao and He, Yue and Cui, Peng}, title = {COUNTS: Benchmarking Object Detectors and Multimodal Large Language Models under Distribution Shifts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9186-9198} }
HOT: Hadamard-based Optimized Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Seonggon and Shin, Juncheol and Woo, Seung-taek and Park, Eunhyeok}, title = {HOT: Hadamard-based Optimized Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4787-4796} }
TokenFlow: Unified Image Tokenizer for Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Liao and Zhang, Huichao and Liu, Yiheng and Wang, Xu and Jiang, Yi and Gao, Yiming and Ye, Hu and Du, Daniel K. and Yuan, Zehuan and Wu, Xinglong}, title = {TokenFlow: Unified Image Tokenizer for Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2545-2555} }
SnapGen-V: Generating a Five-Second Video within Five Seconds on a Mobile Device-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Yushu and Zhang, Zhixing and Li, Yanyu and Xu, Yanwu and Kag, Anil and Sui, Yang and Coskun, Huseyin and Ma, Ke and Lebedev, Aleksei and Hu, Ju and Metaxas, Dimitris N. and Wang, Yanzhi and Tulyakov, Sergey and Ren, Jian}, title = {SnapGen-V: Generating a Five-Second Video within Five Seconds on a Mobile Device}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2479-2490} }
Adapting Dense Matching for Homography Estimation with Grid-based Acceleration-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kaining and Deng, Yuxin and Ma, Jiayi and Favaro, Paolo}, title = {Adapting Dense Matching for Homography Estimation with Grid-based Acceleration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6294-6303} }
CoSDH: Communication-Efficient Collaborative Perception via Supply-Demand Awareness and Intermediate-Late Hybridization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Junhao and Zhang, Yanan and Cai, Zhi and Huang, Di}, title = {CoSDH: Communication-Efficient Collaborative Perception via Supply-Demand Awareness and Intermediate-Late Hybridization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6834-6843} }
Stereo Anywhere: Robust Zero-Shot Deep Stereo Matching Even Where Either Stereo or Mono Fail-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bartolomei_2025_CVPR, author = {Bartolomei, Luca and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano}, title = {Stereo Anywhere: Robust Zero-Shot Deep Stereo Matching Even Where Either Stereo or Mono Fail}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1013-1027} }
Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Guannan and Li, Yujie and Wang, Xiangkun and Zhang, Junbo and Li, Tianrui and Yang, Xin}, title = {Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4894-4904} }
Where the Devil Hides: Deepfake Detectors Can No Longer Be Trusted-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Shuaiwei and Dong, Junyu and Li, Yuezun}, title = {Where the Devil Hides: Deepfake Detectors Can No Longer Be Trusted}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8764-8774} }
CaMuViD: Calibration-Free Multi-View Detection-
[pdf]
[bibtex]@InProceedings{Daryani_2025_CVPR, author = {Daryani, Amir Etefaghi and Bhutta, M. Usman Maqbool and Hernandez, Byron and Medeiros, Henry}, title = {CaMuViD: Calibration-Free Multi-View Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1220-1229} }
Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled Prosody Adapting for Movie Dubbing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhedong and Li, Liang and Yan, Chenggang and Liu, Chunshan and van den Hengel, Anton and Qi, Yuankai}, title = {Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled Prosody Adapting for Movie Dubbing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {172-182} }
HVI: A New Color Space for Low-light Image Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Qingsen and Feng, Yixu and Zhang, Cheng and Pang, Guansong and Shi, Kangbiao and Wu, Peng and Dong, Wei and Sun, Jinqiu and Zhang, Yanning}, title = {HVI: A New Color Space for Low-light Image Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5678-5687} }
DualPM: Dual Posed-Canonical Point Maps for 3D Shape and Pose Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kaye_2025_CVPR, author = {Kaye, Ben and Jakab, Tomas and Wu, Shangzhe and Ruprecht, Christian and Vedaldi, Andrea}, title = {DualPM: Dual Posed-Canonical Point Maps for 3D Shape and Pose Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6425-6435} }
One Diffusion to Generate Them All-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2025_CVPR, author = {Le, Duong H. and Pham, Tuan and Lee, Sangho and Clark, Christopher and Kembhavi, Aniruddha and Mandt, Stephan and Krishna, Ranjay and Lu, Jiasen}, title = {One Diffusion to Generate Them All}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2671-2682} }
CoSER: Towards Consistent Dense Multiview Text-to-Image Generator for 3D Creation-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Bonan and Zhang, Zicheng and Yang, Xingyi and Wang, Xinchao}, title = {CoSER: Towards Consistent Dense Multiview Text-to-Image Generator for 3D Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2880-2890} }
UNEM: UNrolled Generalized EM for Transductive Few-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Long and Shakeri, Fereshteh and Sadraoui, Aymen and Kaaniche, Mounir and Pesquet, Jean-Christophe and Ben Ayed, Ismail}, title = {UNEM: UNrolled Generalized EM for Transductive Few-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9665-9675} }
G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Tianxing and Mu, Yao and Liang, Zhixuan and Chen, Zanxin and Peng, Shijia and Chen, Qiangyu and Xu, Mingkun and Hu, Ruizhen and Zhang, Hongyuan and Li, Xuelong and Luo, Ping}, title = {G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1735-1744} }
Explaining Domain Shifts in Language: Concept Erasing for Interpretable Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2025_CVPR, author = {Zeng, Zequn and Su, Yudi and Sun, Jianqiao and Wen, Tiansheng and Zhang, Hao and Wang, Zhengjue and Chen, Bo and Liu, Hongwei and Ma, Jiawei}, title = {Explaining Domain Shifts in Language: Concept Erasing for Interpretable Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9517-9526} }
Textured Gaussians for Enhanced 3D Scene Appearance Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chao_2025_CVPR, author = {Chao, Brian and Tseng, Hung-Yu and Porzi, Lorenzo and Gao, Chen and Li, Tuotuo and Li, Qinbo and Saraf, Ayush and Huang, Jia-Bin and Kopf, Johannes and Wetzstein, Gordon and Kim, Changil}, title = {Textured Gaussians for Enhanced 3D Scene Appearance Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8964-8974} }
NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Zengrong and Wang, Zheng and Qian, Tianwen and Mu, Pan and Chan, Sixian and Bai, Cong}, title = {NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9263-9273} }
Global-Local Tree Search in VLMs for 3D Indoor Scene Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Wei and Qi, Mengshi and Ma, Huadong}, title = {Global-Local Tree Search in VLMs for 3D Indoor Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8975-8984} }
GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models with Generative Flow Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2025_CVPR, author = {Kang, Haoqiang and Sachdeva, Enna and Gupta, Piyush and Bae, Sangjae and Lee, Kwonjoon}, title = {GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models with Generative Flow Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3815-3825} }
MAP: Unleashing Hybrid Mamba-Transformer Vision Backbone's Potential with Masked Autoregressive Pretraining-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yunze and Yi, Li}, title = {MAP: Unleashing Hybrid Mamba-Transformer Vision Backbone's Potential with Masked Autoregressive Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9676-9685} }
Segment Any-Quality Images with Generative Latent Space Enhancement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Guangqian and Guo, Yong and Yu, Xuehui and Li, Wenbo and Wang, Yaoxing and Gao, Shan}, title = {Segment Any-Quality Images with Generative Latent Space Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2366-2376} }
CityWalker: Learning Embodied Urban Navigation from Web-Scale Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xinhao and Li, Jintong and Jiang, Yicheng and Sujay, Niranjan and Yang, Zhicheng and Zhang, Juexiao and Abanes, John and Zhang, Jing and Feng, Chen}, title = {CityWalker: Learning Embodied Urban Navigation from Web-Scale Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6875-6885} }
Learning Visual Composition through Improved Semantic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stone_2025_CVPR, author = {Stone, Austin and Soltau, Hagen and Geirhos, Robert and Yi, Xi and Xia, Ye and Cao, Bingyi and Chen, Kaifeng and Ogale, Abhijit and Shlens, Jonathon}, title = {Learning Visual Composition through Improved Semantic Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3740-3750} }
JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Yiyang and Liu, Xingchao and Chen, Xiaokang and Liu, Wen and Wu, Chengyue and Wu, Zhiyu and Pan, Zizheng and Xie, Zhenda and Zhang, Haowei and Yu, Xingkai and Zhao, Liang and Wang, Yisong and Liu, Jiaying and Ruan, Chong}, title = {JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7739-7751} }
Visual Prompting for One-shot Controllable Video Editing without Inversion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhengbo and Zhou, Yuxi and Peng, Duo and Lim, Joo-Hwee and Tu, Zhigang and Soh, De Wen and Foo, Lin Geng}, title = {Visual Prompting for One-shot Controllable Video Editing without Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7784-7794} }
AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Kaixuan and Li, Xinde and Li, Xinling and Hu, Chuanfei and Wu, Guoliang}, title = {AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3252-3261} }
Flash-Split: 2D Reflection Removal with Flash Cues and Latent Diffusion Separation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Tianfu and Xie, Mingyang and Cai, Haoming and Shah, Sachin and Metzler, Christopher A.}, title = {Flash-Split: 2D Reflection Removal with Flash Cues and Latent Diffusion Separation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5688-5698} }
Attention IoU: Examining Biases in CelebA using Attention Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Serianni_2025_CVPR, author = {Serianni, Aaron and Zhu, Tyler and Russakovsky, Olga and Ramaswamy, Vikram V.}, title = {Attention IoU: Examining Biases in CelebA using Attention Maps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4386-4397} }
HEIE: MLLM-Based Hierarchical Explainable AIGC Image Implausibility Evaluator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Fan and Zhen, Ru and Wang, Jianing and Zhang, Yanhao and Chen, Haoxiang and Lu, Haonan and Zhao, Sicheng and Ding, Guiguang}, title = {HEIE: MLLM-Based Hierarchical Explainable AIGC Image Implausibility Evaluator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3856-3866} }
Segment Any Motion in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Nan and Zheng, Wenzhao and Xu, Chenfeng and Keutzer, Kurt and Zhang, Shanghang and Kanazawa, Angjoo and Wang, Qianqian}, title = {Segment Any Motion in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3406-3416} }
Task-aware Cross-modal Feature Refinement Transformer with Large Language Models for Visual Grounding-
[pdf]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Wenbo and Xu, Zhen and Xu, Ruotao and Wu, Si and Wong, Hau-San}, title = {Task-aware Cross-modal Feature Refinement Transformer with Large Language Models for Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3931-3941} }
PatchDEMUX: A Certifiably Robust Framework for Multi-label Classifiers Against Adversarial Patches-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jacob_2025_CVPR, author = {Jacob, Dennis and Xiang, Chong and Mittal, Prateek}, title = {PatchDEMUX: A Certifiably Robust Framework for Multi-label Classifiers Against Adversarial Patches}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9944-9953} }
EgoTextVQA: Towards Egocentric Scene-Text Aware Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Sheng and Xiao, Junbin and Li, Qingyun and Li, Yicong and Yang, Xun and Guo, Dan and Wang, Meng and Chua, Tat-Seng and Yao, Angela}, title = {EgoTextVQA: Towards Egocentric Scene-Text Aware Video Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3363-3373} }
Token Cropr: Faster ViTs for Quite a Few Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bergner_2025_CVPR, author = {Bergner, Benjamin and Lippert, Christoph and Mahendran, Aravindh}, title = {Token Cropr: Faster ViTs for Quite a Few Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9740-9750} }
STCOcc: Sparse Spatial-Temporal Cascade Renovation for 3D Occupancy and Scene Flow Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Zhimin and Wei, Ping and Chen, Shuaijia and Wang, Haoxuan and Ren, Ziyang}, title = {STCOcc: Sparse Spatial-Temporal Cascade Renovation for 3D Occupancy and Scene Flow Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1516-1526} }
Resilient Sensor Fusion Under Adverse Sensor Failures via Multi-Modal Expert Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Konyul and Kim, Yecheol and Kim, Daehun and Choi, Jun Won}, title = {Resilient Sensor Fusion Under Adverse Sensor Failures via Multi-Modal Expert Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6720-6729} }
MambaVO: Deep Visual Odometry Based on Sequential Matching Refinement and Training Smoothing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Shuo and Li, Wanting and Wang, Yongcai and Fan, Zhaoxin and Huang, Zhe and Cai, Xudong and Zhao, Jian and Li, Deying}, title = {MambaVO: Deep Visual Odometry Based on Sequential Matching Refinement and Training Smoothing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1252-1262} }
IndoorGS: Geometric Cues Guided Gaussian Splatting for Indoor Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Ruan_2025_CVPR, author = {Ruan, Cong and Wang, Yuesong and Guan, Tao and Zhang, Bin and Ju, Lili}, title = {IndoorGS: Geometric Cues Guided Gaussian Splatting for Indoor Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {844-853} }
Point-Cache: Test-time Dynamic and Hierarchical Cache for Robust and Generalizable Point Cloud Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Hongyu and Ke, Qiuhong and Cheng, Ming and Wang, Yongcai and Li, Deying and Gou, Chenhui and Cai, Jianfei}, title = {Point-Cache: Test-time Dynamic and Hierarchical Cache for Robust and Generalizable Point Cloud Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1263-1275} }
Stop Walking in Circles! Bailing Out Early in Projected Gradient Descent-
[pdf]
[arXiv]
[bibtex]@InProceedings{Doldo_2025_CVPR, author = {Doldo, Philip and Everett, Derek and Khanna, Amol and Nguyen, Andre T and Raff, Edward}, title = {Stop Walking in Circles! Bailing Out Early in Projected Gradient Descent}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6373-6382} }
MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Zhenyu and Zhou, Yuheng and Xu, Xiuwei and Wang, Ziwei and Yan, Haibin}, title = {MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1714-1723} }
Stable-SCore: A Stable Registration-based Framework for 3D Shape Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Haolin and Zhan, Xiaohang and Yan, Zizheng and Luo, Zhongjin and Wen, Yuxin and Han, Xiaoguang}, title = {Stable-SCore: A Stable Registration-based Framework for 3D Shape Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {917-928} }
Beyond Single-Modal Boundary: Cross-Modal Anomaly Detection through Visual Prototype and Harmonization-
[pdf]
[bibtex]@InProceedings{Mao_2025_CVPR, author = {Mao, Kai and Wei, Ping and Lian, Yiyang and Wang, Yangyang and Zheng, Nanning}, title = {Beyond Single-Modal Boundary: Cross-Modal Anomaly Detection through Visual Prototype and Harmonization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9964-9973} }
Align-KD: Distilling Cross-Modal Alignment Knowledge for Mobile Vision-Language Large Model Enhancement-
[pdf]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Qianhan and Li, Wenshuo and Lin, Tong and Chen, Xinghao}, title = {Align-KD: Distilling Cross-Modal Alignment Knowledge for Mobile Vision-Language Large Model Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4178-4188} }
Pose Priors from Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Subramanian_2025_CVPR, author = {Subramanian, Sanjay and Ng, Evonne and M\"uller, Lea and Klein, Dan and Ginosar, Shiry and Darrell, Trevor}, title = {Pose Priors from Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7125-7135} }
LogoSP: Local-global Grouping of Superpoints for Unsupervised Semantic Segmentation of 3D Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihui and Dai, Weisheng and Wen, Hongtao and Yang, Bo}, title = {LogoSP: Local-global Grouping of Superpoints for Unsupervised Semantic Segmentation of 3D Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1374-1384} }
Exploring Intrinsic Normal Prototypes within a Single Image for Universal Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Wei and Cao, Yunkang and Yao, Haiming and Zhang, Xiaotian and Lou, Jianan and Cheng, Yuqi and Shen, Weiming and Yu, Wenyong}, title = {Exploring Intrinsic Normal Prototypes within a Single Image for Universal Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9974-9983} }
Augmenting Perceptual Super-Resolution via Image Quality Predictors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Fengjia and Rangrej, Samrudhdhi B. and Aumentado-Armstrong, Tristan and Fazly, Afsaneh and Levinshtein, Alex}, title = {Augmenting Perceptual Super-Resolution via Image Quality Predictors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2311-2322} }
TurboFill: Adapting Few-step Text-to-image Model for Fast Image Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Liangbin and Pakhomov, Daniil and Wang, Zhonghao and Wu, Zongze and Chen, Ziyan and Zhou, Yuqian and Zheng, Haitian and Zhang, Zhifei and Lin, Zhe and Zhou, Jiantao and Dong, Chao}, title = {TurboFill: Adapting Few-step Text-to-image Model for Fast Image Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7613-7622} }
Stochastic Human Motion Prediction with Memory of Action Transition and Action Characteristic-
[pdf]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Jianwei and Yang, Hong and Chen, Tengyue and Hu, Jian-Fang}, title = {Stochastic Human Motion Prediction with Memory of Action Transition and Action Characteristic}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1883-1893} }
Perception Tokens Enhance Visual Reasoning in Multimodal Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bigverdi_2025_CVPR, author = {Bigverdi, Mahtab and Luo, Zelun and Hsieh, Cheng-Yu and Shen, Ethan and Chen, Dongping and Shapiro, Linda G. and Krishna, Ranjay}, title = {Perception Tokens Enhance Visual Reasoning in Multimodal Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3836-3845} }
X-Dyna: Expressive Dynamic Human Image Animation-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Di and Xu, Hongyi and Xie, You and Gao, Yipeng and Kuang, Zhengfei and Cai, Shengqu and Zhang, Chenxu and Song, Guoxian and Wang, Chao and Shi, Yichun and Chen, Zeyuan and Zhou, Shijie and Luo, Linjie and Wetzstein, Gordon and Soleymani, Mohammad}, title = {X-Dyna: Expressive Dynamic Human Image Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5499-5509} }
Towards Effective and Sparse Adversarial Attack on Spiking Neural Networks via Breaking Invisible Surrogate Gradients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lun_2025_CVPR, author = {Lun, Li and Feng, Kunyu and Ni, Qinglong and Liang, Ling and Wang, Yuan and Li, Ying and Yu, Dunshan and Cui, Xiaoxin}, title = {Towards Effective and Sparse Adversarial Attack on Spiking Neural Networks via Breaking Invisible Surrogate Gradients}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3540-3551} }
Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Franchi_2025_CVPR, author = {Franchi, Gianni and Belkhir, Nacim and Trong, Dat Nguyen and Xia, Guoxuan and Pilzer, Andrea}, title = {Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8062-8072} }
LLaVA-ST: A Multimodal Large Language Model for Fine-Grained Spatial-Temporal Understanding-
[pdf]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hongyu and Chen, Jinyu and Wei, Ziyu and Huang, Shaofei and Hui, Tianrui and Gao, Jialin and Wei, Xiaoming and Liu, Si}, title = {LLaVA-ST: A Multimodal Large Language Model for Fine-Grained Spatial-Temporal Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8592-8603} }
Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Yuhao and Liu, Zuyan and Sun, Hai-Long and Yang, Jingkang and Hu, Winston and Rao, Yongming and Liu, Ziwei}, title = {Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9062-9072} }
MaIR: A Locality- and Continuity-Preserving Mamba for Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Boyun and Zhao, Haiyu and Wang, Wenxin and Hu, Peng and Gou, Yuanbiao and Peng, Xi}, title = {MaIR: A Locality- and Continuity-Preserving Mamba for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7491-7501} }
RSAR: Restricted State Angle Resolver and Rotated SAR Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Yang, Xue and Li, Yuxuan and Yang, Jian and Cheng, Ming-Ming and Li, Xiang}, title = {RSAR: Restricted State Angle Resolver and Rotated SAR Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7416-7426} }
Continuous Space-Time Video Resampling with Invertible Motion Steganography-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuantong and Chen, Zhenzhong}, title = {Continuous Space-Time Video Resampling with Invertible Motion Steganography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2116-2126} }
ProtoDepth: Unsupervised Continual Depth Completion with Prototypes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rim_2025_CVPR, author = {Rim, Patrick and Park, Hyoungseob and Gangopadhyay, S. and Zeng, Ziyao and Chung, Younjoon and Wong, Alex}, title = {ProtoDepth: Unsupervised Continual Depth Completion with Prototypes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6304-6316} }
ParaHome: Parameterizing Everyday Home Activities Towards 3D Generative Modeling of Human-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jeonghwan and Kim, Jisoo and Na, Jeonghyeon and Joo, Hanbyul}, title = {ParaHome: Parameterizing Everyday Home Activities Towards 3D Generative Modeling of Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1816-1828} }
Adapting to the Unknown: Training-Free Audio-Visual Event Perception with Dynamic Thresholds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shaar_2025_CVPR, author = {Shaar, Eitan and Shaulov, Ariel and Chechik, Gal and Wolf, Lior}, title = {Adapting to the Unknown: Training-Free Audio-Visual Event Perception with Dynamic Thresholds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3142-3151} }
OpenHumanVid: A Large-Scale High-Quality Dataset for Enhancing Human-Centric Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Hui and Xu, Mingwang and Zhan, Yun and Mu, Shan and Li, Jiaye and Cheng, Kaihui and Chen, Yuxuan and Chen, Tan and Ye, Mao and Wang, Jingdong and Zhu, Siyu}, title = {OpenHumanVid: A Large-Scale High-Quality Dataset for Enhancing Human-Centric Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7752-7762} }
Track Any Anomalous Object:A Granular Video Anomaly Detection Pipeline-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yuzhi and Li, Chenxin and Zhang, Haitao and Lin, Zixu and Lin, Yunlong and Liu, Hengyu and Li, Wuyang and Liu, Xinyu and Gao, Jiechao and Huang, Yue and Ding, Xinghao and Yuan, Yixuan}, title = {Track Any Anomalous Object:A Granular Video Anomaly Detection Pipeline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8689-8699} }
Object-aware Sound Source Localization via Audio-Visual Scene Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Um_2025_CVPR, author = {Um, Sung Jin and Kim, Dongjin and Lee, Sangmin and Kim, Jung Uk}, title = {Object-aware Sound Source Localization via Audio-Visual Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8342-8351} }
SerialGen: Personalized Image Generation by First Standardization Then Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Cong and Zou, Han and Yu, Ruiqi and Zhang, Yan and Zhan, Zhenpeng}, title = {SerialGen: Personalized Image Generation by First Standardization Then Personalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2847-2856} }
Augmented Deep Contexts for Spatially Embedded Video Coding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bian_2025_CVPR, author = {Bian, Yifan and Tang, Chuanbo and Li, Li and Liu, Dong}, title = {Augmented Deep Contexts for Spatially Embedded Video Coding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2094-2104} }
Proximal Algorithm Unrolling: Flexible and Efficient Reconstruction Networks for Single-Pixel Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ping and Wang, Lishun and Qu, Gang and Wang, Xiaodong and Zhang, Yulun and Yuan, Xin}, title = {Proximal Algorithm Unrolling: Flexible and Efficient Reconstruction Networks for Single-Pixel Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {411-421} }
Image Quality Assessment: From Human to Machine Preference-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Chunyi and Tian, Yuan and Ling, Xiaoyue and Zhang, Zicheng and Duan, Haodong and Wu, Haoning and Jia, Ziheng and Liu, Xiaohong and Min, Xiongkuo and Lu, Guo and Lin, Weisi and Zhai, Guangtao}, title = {Image Quality Assessment: From Human to Machine Preference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7570-7581} }
Context-Aware Multimodal Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roth_2025_CVPR, author = {Roth, Karsten and Akata, Zeynep and Damen, Dima and Balazevic, Ivana and Henaff, Olivier J.}, title = {Context-Aware Multimodal Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4267-4279} }
Task-driven Image Fusion with Learnable Fusion Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Haowen and Zhang, Jiangshe and Zhao, Zixiang and Wu, Yichen and Deng, Lilun and Cui, Yukun and Feng, Tao and Xu, Shuang}, title = {Task-driven Image Fusion with Learnable Fusion Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7457-7468} }
LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yikun and Zhang, Yajie and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Yao, Jiangchao and Wang, Yanfeng and Xie, Weidi}, title = {LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4015-4025} }
CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Wei and Li, Lin and Yang, Yongqi and Wen, Bin and Yang, Fan and Gao, Tingting and Wu, Yu and Chen, Long}, title = {CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8073-8082} }
MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ruicheng and Xu, Sicheng and Dai, Cassie and Xiang, Jianfeng and Deng, Yu and Tong, Xin and Yang, Jiaolong}, title = {MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5261-5271} }
Beyond Background Shift: Rethinking Instance Replay in Continual Semantic Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Hongmei and Feng, Tingliang and Lyu, Fan and Shang, Fanhua and Liu, Hongying and Feng, Wei and Wan, Liang}, title = {Beyond Background Shift: Rethinking Instance Replay in Continual Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9839-9848} }
ScaleLSD: Scalable Deep Line Segment Detection Streamlined-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Zeran and Tan, Bin and Zheng, Xianwei and Shen, Yujun and Wu, Tianfu and Xue, Nan}, title = {ScaleLSD: Scalable Deep Line Segment Detection Streamlined}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6327-6336} }
Revisiting MAE Pre-training for 3D Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wald_2025_CVPR, author = {Wald, Tassilo and Ulrich, Constantin and Lukyanenko, Stanislav and Goncharov, Andrei and Paderno, Alberto and Miller, Maximilian and Maerkisch, Leander and Jaeger, Paul and Maier-Hein, Klaus}, title = {Revisiting MAE Pre-training for 3D Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5186-5196} }
ChatHuman: Chatting about 3D Humans with Tools-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jing and Feng, Yao and Liu, Weiyang and Black, Michael J.}, title = {ChatHuman: Chatting about 3D Humans with Tools}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8150-8161} }
Scalable Autoregressive Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Jinhong and Liu, Jian and Tang, Dongqi and Wang, Weiqiang and Li, Wentong and Chen, Danny and Chen, Jintai and Wu, Jian}, title = {Scalable Autoregressive Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6262-6272} }
Recurrence-Enhanced Vision-and-Language Transformers for Robust Multimodal Document Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Caffagni_2025_CVPR, author = {Caffagni, Davide and Sarto, Sara and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Recurrence-Enhanced Vision-and-Language Transformers for Robust Multimodal Document Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9286-9295} }
Camouflage Anything: Learning to Hide using Controlled Out-painting and Representation Engineering-
[pdf]
[supp]
[bibtex]@InProceedings{Das_2025_CVPR, author = {Das, Biplab and Gopalakrishnan, Viswanath}, title = {Camouflage Anything: Learning to Hide using Controlled Out-painting and Representation Engineering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3603-3613} }
Test-Time Fine-Tuning of Image Compression Models for Multi-Task Adaptability-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Unki and Jeong, Seongmoon and Jang, Youngchan and Park, Gyeong-Moon and Ko, Jong Hwan}, title = {Test-Time Fine-Tuning of Image Compression Models for Multi-Task Adaptability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4430-4440} }
DynPose: Largely Improving the Efficiency of Human Pose Estimation by a Simple Dynamic Framework-
[pdf]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yalong and Zhao, Lin and Gong, Chen and Li, Guangyu and Wang, Di and Wang, Nannan}, title = {DynPose: Largely Improving the Efficiency of Human Pose Estimation by a Simple Dynamic Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1160-1169} }
VideoTree: Adaptive Tree-based Video Representation for LLM Reasoning on Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ziyang and Yu, Shoubin and Stengel-Eskin, Elias and Yoon, Jaehong and Cheng, Feng and Bertasius, Gedas and Bansal, Mohit}, title = {VideoTree: Adaptive Tree-based Video Representation for LLM Reasoning on Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3272-3283} }
Distinguish Then Exploit: Source-free Open Set Domain Adaptation via Weight Barcode Estimation and Sparse Label Assignment-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Weiming and Dan, Jun and Wang, Fan and Liao, Xinting and Dong, Junhao and Yu, Hua and Dong, Shunjie and Qi, Lianyong}, title = {Distinguish Then Exploit: Source-free Open Set Domain Adaptation via Weight Barcode Estimation and Sparse Label Assignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4927-4938} }
Multi-Sensor Object Anomaly Detection: Unifying Appearance, Geometry, and Internal Properties-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wenqiao and Zheng, Bozhong and Xu, Xiaohao and Gan, Jinye and Lu, Fading and Li, Xiang and Ni, Na and Tian, Zheng and Huang, Xiaonan and Gao, Shenghua and Wu, Yingna}, title = {Multi-Sensor Object Anomaly Detection: Unifying Appearance, Geometry, and Internal Properties}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9984-9993} }
Fancy123: One Image to High-Quality 3D Mesh Generation via Plug-and-Play Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Qiao and Li, Xianzhi and Tang, Yuan and Han, Xu and Hu, Long and Hao, Yixue and Chen, Min}, title = {Fancy123: One Image to High-Quality 3D Mesh Generation via Plug-and-Play Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {595-604} }
PlanarSplatting: Accurate Planar Surface Reconstruction in 3 Minutes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Bin and Yu, Rui and Shen, Yujun and Xue, Nan}, title = {PlanarSplatting: Accurate Planar Surface Reconstruction in 3 Minutes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1190-1199} }
Omni-ID: Holistic Identity Representation Designed for Generative Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2025_CVPR, author = {Qian, Guocheng and Wang, Kuan-Chieh and Patashnik, Or and Heravi, Negin and Ostashev, Daniil and Tulyakov, Sergey and Cohen-Or, Daniel and Aberman, Kfir}, title = {Omni-ID: Holistic Identity Representation Designed for Generative Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8786-8795} }
MIRE: Matched Implicit Neural Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Jayasundara_2025_CVPR, author = {Jayasundara, Dhananjaya and Zhao, Heng and Labate, Demetrio and Patel, Vishal M.}, title = {MIRE: Matched Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8279-8288} }
AeSPa : Attention-guided Self-supervised Parallel Imaging for MRI Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Joo_2025_CVPR, author = {Joo, Jinho and Kim, Hyeseong and Won, Hyeyeon and Lee, Deukhee and Eo, Taejoon and Hwang, Dosik}, title = {AeSPa : Attention-guided Self-supervised Parallel Imaging for MRI Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5217-5226} }
RobSense: A Robust Multi-modal Foundation Model for Remote Sensing with Static, Temporal, and Incomplete Data Adaptability-
[pdf]
[bibtex]@InProceedings{Do_2025_CVPR, author = {Do, Minh Kha and Han, Kang and Lai, Phu and Phan, Khoa T. and Xiang, Wei}, title = {RobSense: A Robust Multi-modal Foundation Model for Remote Sensing with Static, Temporal, and Incomplete Data Adaptability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7427-7436} }
MAC-Ego3D: Multi-Agent Gaussian Consensus for Real-Time Collaborative Ego-Motion and Photorealistic 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Xiaohao and Xue, Feng and Zhao, Shibo and Pan, Yike and Scherer, Sebastian and Huang, Xiaonan}, title = {MAC-Ego3D: Multi-Agent Gaussian Consensus for Real-Time Collaborative Ego-Motion and Photorealistic 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {854-863} }
Online Video Understanding: OVBench and VideoChat-Online-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Zhenpeng and Li, Xinhao and Li, Jiaqi and Wang, Jing and Zeng, Xiangyu and Liang, Cheng and Wu, Tao and Chen, Xi and Li, Liang and Wang, Limin}, title = {Online Video Understanding: OVBench and VideoChat-Online}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3328-3338} }
LightLoc: Learning Outdoor LiDAR Localization at Light Speed-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wen and Liu, Chen and Yu, Shangshu and Liu, Dunqiang and Zhou, Yin and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {LightLoc: Learning Outdoor LiDAR Localization at Light Speed}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6680-6689} }
Accurate Differential Operators for Hybrid Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chetan_2025_CVPR, author = {Chetan, Aditya and Yang, Guandao and Wang, Zichen and Marschner, Steve and Hariharan, Bharath}, title = {Accurate Differential Operators for Hybrid Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {530-539} }
FeedEdit: Text-Based Image Editing with Dynamic Feedback Regulation-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2025_CVPR, author = {Fu, Fengyi and Zhang, Lei and Huang, Mengqi and Mao, Zhendong}, title = {FeedEdit: Text-Based Image Editing with Dynamic Feedback Regulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2661-2670} }
Classifier-guided CLIP Distillation for Unsupervised Multi-label Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Dongseob and Shim, Hyunjung}, title = {Classifier-guided CLIP Distillation for Unsupervised Multi-label Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4661-4671} }
UMotion: Uncertainty-driven Human Motion Estimation from Inertial and Ultra-wideband Units-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Huakun and Ota, Hiroki and Wei, Xin and Hirao, Yutaro and Perusquia-Hernandez, Monica and Uchiyama, Hideaki and Kiyokawa, Kiyoshi}, title = {UMotion: Uncertainty-driven Human Motion Estimation from Inertial and Ultra-wideband Units}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7085-7094} }
Scene Map-based Prompt Tuning for Navigation Instruction Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2025_CVPR, author = {Fan, Sheng and Liu, Rui and Wang, Wenguan and Yang, Yi}, title = {Scene Map-based Prompt Tuning for Navigation Instruction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6898-6908} }
DropoutGS: Dropping Out Gaussians for Better Sparse-view Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yexing and Wang, Longguang and Chen, Minglin and Ao, Sheng and Li, Li and Guo, Yulan}, title = {DropoutGS: Dropping Out Gaussians for Better Sparse-view Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {701-710} }
Bridging Modalities: Improving Universal Multimodal Retrieval by Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Zhang, Yanzhao and Xie, Wen and Li, Mingxin and Dai, Ziqi and Long, Dingkun and Xie, Pengjun and Zhang, Meishan and Li, Wenjie and Zhang, Min}, title = {Bridging Modalities: Improving Universal Multimodal Retrieval by Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9274-9285} }
Enhancing Dataset Distillation via Non-Critical Region Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2025_CVPR, author = {Tran, Minh-Tuan and Le, Trung and Le, Xuan-May and Do, Thanh-Toan and Phung, Dinh}, title = {Enhancing Dataset Distillation via Non-Critical Region Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10015-10024} }
PUP 3D-GS: Principled Uncertainty Pruning for 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Hanson_2025_CVPR, author = {Hanson, Alex and Tu, Allen and Singla, Vasu and Jayawardhana, Mayuka and Zwicker, Matthias and Goldstein, Tom}, title = {PUP 3D-GS: Principled Uncertainty Pruning for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5949-5958} }
ScribbleLight: Single Image Indoor Relighting with Scribbles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Jun Myeong and Wang, Annie and Peers, Pieter and Bhattad, Anand and Sengupta, Roni}, title = {ScribbleLight: Single Image Indoor Relighting with Scribbles}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5720-5731} }
InsightEdit: Towards Better Instruction Following for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yingjing and Kong, Jie and Wang, Jiazhi and Pan, Xiao and Lin, Bo and Liu, Qiang}, title = {InsightEdit: Towards Better Instruction Following for Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2694-2703} }
One-for-More: Continual Diffusion Model for Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xiaofan and Tan, Xin and Chen, Zhuo and Zhang, Zhizhong and Zhang, Ruixin and Guo, Rizen and Jiang, Guanna and Chen, Yulong and Qu, Yanyun and Ma, Lizhuang and Xie, Yuan}, title = {One-for-More: Continual Diffusion Model for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4766-4775} }
Omni-RGPT: Unifying Image and Video Region-level Understanding via Token Marks-
[pdf]
[supp]
[bibtex]@InProceedings{Heo_2025_CVPR, author = {Heo, Miran and Chen, Min-Hung and Huang, De-An and Liu, Sifei and Radhakrishnan, Subhashree and Kim, Seon Joo and Wang, Yu-Chiang Frank and Hachiuma, Ryo}, title = {Omni-RGPT: Unifying Image and Video Region-level Understanding via Token Marks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3919-3930} }
EDM: Equirectangular Projection-Oriented Dense Kernelized Feature Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2025_CVPR, author = {Jung, Dongki and Choi, Jaehoon and Lee, Yonghan and Jeong, Somi and Lee, Taejae and Manocha, Dinesh and Yeon, Suyong}, title = {EDM: Equirectangular Projection-Oriented Dense Kernelized Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6337-6347} }
EZSR: Event-based Zero-Shot Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Yan and Pan, Liyuan and Li, Dongxu and Liu, Liu}, title = {EZSR: Event-based Zero-Shot Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4628-4638} }
SVFR: A Unified Framework for Generalized Video Face Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhiyao and Chen, Xu and Xu, Chengming and Zhu, Junwei and Hu, Xiaobin and Zhang, Jiangning and Wang, Chengjie and Liu, Yuqi and Zhou, Yiyi and Ji, Rongrong}, title = {SVFR: A Unified Framework for Generalized Video Face Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7406-7415} }
Decoupling Fine Detail and Global Geometry for Compressed Depth Map Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Huan and Han, Wencheng and Shen, Jianbing}, title = {Decoupling Fine Detail and Global Geometry for Compressed Depth Map Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {951-960} }
Digital Twin Catalog: A Large-Scale Photorealistic 3D Object Digital Twin Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Zhao and Chen, Ka and Lv, Zhaoyang and Yu, Hong-Xing and Zhang, Yunzhi and Zhang, Cheng and Zhu, Yufeng and Tian, Stephen and Li, Zhengqin and Moffatt, Geordie and Christofferson, Sean and Fort, James and Pan, Xiaqing and Yan, Mingfei and Wu, Jiajun and Ren, Carl Yuheng and Newcombe, Richard}, title = {Digital Twin Catalog: A Large-Scale Photorealistic 3D Object Digital Twin Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {753-763} }
MeshGen: Generating PBR Textured Mesh with Render-Enhanced Auto-Encoder and Generative Data Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Zilong and Wang, Yikai and Sun, Wenqiang and Wang, Feng and Chen, Yiwen and Liu, Huaping}, title = {MeshGen: Generating PBR Textured Mesh with Render-Enhanced Auto-Encoder and Generative Data Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5835-5848} }
DeClotH: Decomposable 3D Cloth and Human Body Reconstruction from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2025_CVPR, author = {Nam, Hyeongjin and Kim, Donghwan and Oh, Jeongtaek and Lee, Kyoung Mu}, title = {DeClotH: Decomposable 3D Cloth and Human Body Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5636-5645} }
Neuron: Learning Context-Aware Evolving Representations for Zero-Shot Skeleton Action Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Yang and Guo, Jingcai and Guo, Song and Tao, Dacheng}, title = {Neuron: Learning Context-Aware Evolving Representations for Zero-Shot Skeleton Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8721-8730} }
High-Fidelity Relightable Monocular Portrait Animation with Lighting-Controllable Video Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Mingtao and Xing, Guanyu and Liu, Yanli}, title = {High-Fidelity Relightable Monocular Portrait Animation with Lighting-Controllable Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {228-238} }
Plug-and-Play PPO: An Adaptive Point Prompt Optimizer Making SAM Greater-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xueyu and Wang, Rui and Lai, Yexin and Shi, Guangze and Shao, Feixue and Hao, Fang and Zhang, Jianan and Shen, Jia and Wu, Yongfei and Zheng, Wen}, title = {Plug-and-Play PPO: An Adaptive Point Prompt Optimizer Making SAM Greater}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4332-4342} }
EchoONE: Segmenting Multiple Echocardiography Planes in One Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Jiongtong and Xue, Wufeng and Cheng, Jun and Liu, Yingying and Zhuo, Wei and Ni, Dong}, title = {EchoONE: Segmenting Multiple Echocardiography Planes in One Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5207-5216} }
EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yumeng and Long, Xiaoxiao and Yang, Zemin and Liu, Yuan and Habermann, Marc and Theobalt, Christian and Ma, Yuexin and Wang, Wenping}, title = {EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7037-7047} }
Patch Matters: Training-free Fine-grained Image Caption Enhancement via Local Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2025_CVPR, author = {Peng, Ruotian and He, Haiying and Wei, Yake and Wen, Yandong and Hu, Di}, title = {Patch Matters: Training-free Fine-grained Image Caption Enhancement via Local Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3963-3973} }
PosterO: Structuring Layout Trees to Enable Language Models in Generalized Content-Aware Layout Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hsu_2025_CVPR, author = {Hsu, HsiaoYuan and Peng, Yuxin}, title = {PosterO: Structuring Layout Trees to Enable Language Models in Generalized Content-Aware Layout Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8117-8127} }
One2Any: One-Reference 6D Pose Estimation for Any Object-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Mengya and Li, Siyuan and Chhatkuli, Ajad and Truong, Prune and Van Gool, Luc and Tombari, Federico}, title = {One2Any: One-Reference 6D Pose Estimation for Any Object}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6457-6467} }
Contextual AD Narration with Interleaved Multimodal Sequence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hanlin and Tong, Zhan and Zheng, Kecheng and Shen, Yujun and Wang, Limin}, title = {Contextual AD Narration with Interleaved Multimodal Sequence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8372-8383} }
MNE-SLAM: Multi-Agent Neural SLAM for Mobile Robots-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2025_CVPR, author = {Deng, Tianchen and Shen, Guole and Xun, Chen and Yuan, Shenghai and Jin, Tongxin and Shen, Hongming and Wang, Yanbo and Wang, Jingchuan and Wang, Hesheng and Wang, Danwei and Chen, Weidong}, title = {MNE-SLAM: Multi-Agent Neural SLAM for Mobile Robots}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1485-1494} }
TensoFlow: Tensorial Flow-based Sampler for Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Chun and Wei, Xiaofei and Zhang, Li and Zhu, Xiatian}, title = {TensoFlow: Tensorial Flow-based Sampler for Inverse Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {495-504} }
FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Chengyue and Maneechotesuwan, Brisa and Chopra, Shivang and Kira, Zsolt}, title = {FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3909-3918} }
LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive Visual Preferences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhi_2025_CVPR, author = {Zhi, Hongyan and Chen, Peihao and Li, Junyan and Ma, Shuailei and Sun, Xinyu and Xiang, Tianhang and Lei, Yinjie and Tan, Mingkui and Gan, Chuang}, title = {LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive Visual Preferences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3761-3771} }
Exploring Temporally-Aware Features for Point Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, In\`es Hyeonsu and Cho, Seokju and Huang, Jiahui and Yi, Jung and Lee, Joon-Young and Kim, Seungryong}, title = {Exploring Temporally-Aware Features for Point Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1962-1972} }
V^2Dial: Unification of Video and Visual Dialog via Multimodal Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Abdessaied_2025_CVPR, author = {Abdessaied, Adnen and Rohrbach, Anna and Rohrbach, Marcus and Bulling, Andreas}, title = {V{\textasciicircum}2Dial: Unification of Video and Visual Dialog via Multimodal Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8637-8647} }
Detail-Preserving Latent Diffusion for Stable Shadow Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Jiamin and Zheng, Yuxin and Li, Zelong and Wang, Chi and Gu, Renshu and Xu, Weiwei and Xu, Gang}, title = {Detail-Preserving Latent Diffusion for Stable Shadow Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7592-7602} }
CrossOver: 3D Scene Cross-Modal Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarkar_2025_CVPR, author = {Sarkar, Sayan Deb and Miksik, Ondrej and Pollefeys, Marc and Barath, Daniel and Armeni, Iro}, title = {CrossOver: 3D Scene Cross-Modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8985-8994} }
Rethinking Temporal Fusion with a Unified Gradient Descent View for 3D Semantic Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Dubing and Zheng, Huan and Fang, Jin and Dong, Xingping and Li, Xianfei and Liao, Wenlong and He, Tao and Peng, Pai and Shen, Jianbing}, title = {Rethinking Temporal Fusion with a Unified Gradient Descent View for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1505-1515} }
Scalable Video-to-Dataset Generation for Cross-Platform Mobile Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Yunseok and Song, Yeda and Sohn, Sungryull and Logeswaran, Lajanugen and Luo, Tiange and Kim, Dong-Ki and Bae, Kyunghoon and Lee, Honglak}, title = {Scalable Video-to-Dataset Generation for Cross-Platform Mobile Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8604-8614} }
Tokenize Image Patches: Global Context Fusion for Effective Haze Removal in Large Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jiuchen and Yan, Xinyu and Xu, Qizhi and Li, Kaiqi}, title = {Tokenize Image Patches: Global Context Fusion for Effective Haze Removal in Large Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2258-2268} }
ANNEXE: Unified Analyzing, Answering, and Pixel Grounding for Egocentric Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2025_CVPR, author = {Su, Yuejiao and Wang, Yi and Hu, Qiongyang and Yang, Chuang and Chau, Lap-Pui}, title = {ANNEXE: Unified Analyzing, Answering, and Pixel Grounding for Egocentric Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9027-9038} }
MET3R: Measuring Multi-View Consistency in Generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asim_2025_CVPR, author = {Asim, Mohammad and Wewer, Christopher and Wimmer, Thomas and Schiele, Bernt and Lenssen, Jan Eric}, title = {MET3R: Measuring Multi-View Consistency in Generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6034-6044} }
Segmenting Maxillofacial Structures in CBCT Volumes-
[pdf]
[bibtex]@InProceedings{Bolelli_2025_CVPR, author = {Bolelli, Federico and Marchesini, Kevin and van Nistelrooij, Niels and Lumetti, Luca and Pipoli, Vittorio and Ficarra, Elisa and Vinayahalingam, Shankeeth and Grana, Costantino}, title = {Segmenting Maxillofacial Structures in CBCT Volumes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5238-5248} }
3D Dental Model Segmentation with Geometrical Boundary Preserving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xi_2025_CVPR, author = {Xi, Shufan and Liu, Zexian and Chang, Junlin and Wu, Hongyu and Wang, Xiaogang and Hao, Aimin}, title = {3D Dental Model Segmentation with Geometrical Boundary Preserving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10476-10485} }
VideoGigaGAN: Towards Detail-rich Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Yiran and Park, Taesung and Zhang, Richard and Zhou, Yang and Shechtman, Eli and Liu, Feng and Huang, Jia-Bin and Liu, Difan}, title = {VideoGigaGAN: Towards Detail-rich Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2139-2149} }
GLUS: Global-Local Reasoning Unified into A Single Large Language Model for Video Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Lang and Yu, Xueyang and Pang, Ziqi and Wang, Yu-Xiong}, title = {GLUS: Global-Local Reasoning Unified into A Single Large Language Model for Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8658-8667} }
Towards RAW Object Detection in Diverse Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhong-Yu and Jin, Xin and Sun, Bo-Yuan and Guo, Chun-Le and Cheng, Ming-Ming}, title = {Towards RAW Object Detection in Diverse Conditions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8859-8868} }
FLAME: Frozen Large Language Models Enable Data-Efficient Language-Image Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Anjia and Wei, Xing and Ma, Zhiheng}, title = {FLAME: Frozen Large Language Models Enable Data-Efficient Language-Image Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4080-4090} }
Adapter Merging with Centroid Prototype Mapping for Scalable Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fukuda_2025_CVPR, author = {Fukuda, Takuma and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Adapter Merging with Centroid Prototype Mapping for Scalable Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4884-4893} }
OpenSDI: Spotting Diffusion-Generated Images in the Open World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yabin and Huang, Zhiwu and Hong, Xiaopeng}, title = {OpenSDI: Spotting Diffusion-Generated Images in the Open World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4291-4301} }
Lux Post Facto: Learning Portrait Performance Relighting with Conditional Video Diffusion and a Hybrid Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Yiqun and He, Mingming and Ma, Li and Philip, Julien and Xian, Wenqi and George, David M and Yu, Xueming and Dedic, Gabriel and Ta\c{s}el, Ahmet Levent and Yu, Ning and Patel, Vishal M. and Debevec, Paul}, title = {Lux Post Facto: Learning Portrait Performance Relighting with Conditional Video Diffusion and a Hybrid Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5510-5522} }
DiG: Scalable and Efficient Diffusion Models with Gated Linear Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lianghui and Huang, Zilong and Liao, Bencheng and Liew, Jun Hao and Yan, Hanshu and Feng, Jiashi and Wang, Xinggang}, title = {DiG: Scalable and Efficient Diffusion Models with Gated Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7664-7674} }
Monocular and Generalizable Gaussian Talking Head Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2025_CVPR, author = {Gong, Shengjie and Li, Haojie and Tang, Jiapeng and Hu, Dongming and Huang, Shuangping and Chen, Hao and Chen, Tianshui and Liu, Zhuoman}, title = {Monocular and Generalizable Gaussian Talking Head Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5523-5534} }
Locally Orderless Images for Optimization in Differentiable Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mehta_2025_CVPR, author = {Mehta, Ishit and Chandraker, Manmohan and Ramamoorthi, Ravi}, title = {Locally Orderless Images for Optimization in Differentiable Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5763-5772} }
Plug-and-Play Interpretable Responsible Text-to-Image Generation via Dual-Space Multi-facet Concept Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Azam_2025_CVPR, author = {Azam, Basim and Akhtar, Naveed}, title = {Plug-and-Play Interpretable Responsible Text-to-Image Generation via Dual-Space Multi-facet Concept Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2976-2985} }
Fine-Grained Erasure in Text-to-Image Diffusion-based Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thakral_2025_CVPR, author = {Thakral, Kartik and Glaser, Tamar and Hassner, Tal and Vatsa, Mayank and Singh, Richa}, title = {Fine-Grained Erasure in Text-to-Image Diffusion-based Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9121-9130} }
DeRS: Towards Extremely Efficient Upcycled Mixture-of-Experts Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Yongqi and Ye, Peng and Huang, Chenyu and Cao, Jianjian and Zhang, Lin and Li, Baopu and Yu, Gang and Chen, Tao}, title = {DeRS: Towards Extremely Efficient Upcycled Mixture-of-Experts Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10056-10066} }
ALIEN: Implicit Neural Representations for Human Motion Prediction under Arbitrary Latency-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Dong and Sun, Xiaoning and Gao, Xizhan and Hu, Shengxiang and Sun, Huaijiang}, title = {ALIEN: Implicit Neural Representations for Human Motion Prediction under Arbitrary Latency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1861-1870} }
Sufficient Invariant Learning for Distribution Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Taero and Park, Subeen and Lim, Sungjun and Jung, Yonghan and Muandet, Krikamol and Song, Kyungwoo}, title = {Sufficient Invariant Learning for Distribution Shift}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4958-4967} }
Domain Generalization in CLIP via Learning with Diverse Text Prompts-
[pdf]
[bibtex]@InProceedings{Wen_2025_CVPR, author = {Wen, Changsong and Peng, Zelin and Huang, Yu and Yang, Xiaokang and Shen, Wei}, title = {Domain Generalization in CLIP via Learning with Diverse Text Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9559-9569} }
IterIS: Iterative Inference-Solving Alignment for LoRA Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Hongxu and Wang, Zhen and Li, Runshi and Zhu, Bowei and Chen, Long}, title = {IterIS: Iterative Inference-Solving Alignment for LoRA Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4829-4838} }
Efficient ANN-Guided Distillation: Aligning Rate-based Features of Spiking Neural Networks through Hybrid Block-wise Replacement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Shu and Yu, Chengting and Liu, Lei and Ma, Hanzhi and Wang, Aili and Li, Erping}, title = {Efficient ANN-Guided Distillation: Aligning Rate-based Features of Spiking Neural Networks through Hybrid Block-wise Replacement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10025-10035} }
PrEditor3D: Fast and Precise 3D Shape Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Erkoc_2025_CVPR, author = {Erko\c{c}, Ziya and G\"umeli, Can and Wang, Chaoyang and Nie{\ss}ner, Matthias and Dai, Angela and Wonka, Peter and Lee, Hsin-Ying and Zhuang, Peiye}, title = {PrEditor3D: Fast and Precise 3D Shape Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {640-649} }
ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hao and Jiang, Tangyu and Jia, Shuning and Yan, Shannan and Liu, Shunning and Qian, Haolong and Li, Guanghao and Dong, Shuting and Yuan, Chun}, title = {ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4508-4517} }
LOCORE: Image Re-ranking with Long-Context Sequence Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Zilin and Suma, Pavel and Sachdeva, Ayush and Wang, Hao-Jen and Kordopatis-Zilos, Giorgos and Tolias, Giorgos and Ordonez, Vicente}, title = {LOCORE: Image Re-ranking with Long-Context Sequence Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9580-9590} }
LiVOS: Light Video Object Segmentation with Gated Linear Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Qin and Wang, Jianfeng and Yang, Zhengyuan and Li, Linjie and Lin, Kevin and Niethammer, Marc and Wang, Lijuan}, title = {LiVOS: Light Video Object Segmentation with Gated Linear Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8668-8678} }
Polarized Color Screen Matting-
[pdf]
[supp]
[bibtex]@InProceedings{Enomoto_2025_CVPR, author = {Enomoto, Kenji and Cohen, Scott and Price, Brian and Rhodes, TJ}, title = {Polarized Color Screen Matting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {391-399} }
GOAL: Global-local Object Alignment Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Hyungyu and Jang, Young Kyun and Eom, Chanho}, title = {GOAL: Global-local Object Alignment Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4070-4079} }
Post-pre-training for Modality Alignment in Vision-Language Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamaguchi_2025_CVPR, author = {Yamaguchi, Shin'ya and Feng, Dewei and Kanai, Sekitoshi and Adachi, Kazuki and Chijiwa, Daiki}, title = {Post-pre-training for Modality Alignment in Vision-Language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4256-4266} }
SynthLight: Portrait Relighting with Diffusion Model by Learning to Re-render Synthetic Faces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaturvedi_2025_CVPR, author = {Chaturvedi, Sumit and Ren, Mengwei and Hold-Geoffroy, Yannick and Liu, Jingyuan and Dorsey, Julie and Shu, Zhixin}, title = {SynthLight: Portrait Relighting with Diffusion Model by Learning to Re-render Synthetic Faces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {369-379} }
Pseudo Visible Feature Fine-Grained Fusion for Thermal Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ting and Ye, Mao and Wu, Tianwen and Li, Nianxin and Li, Shuaifeng and Tang, Song and Ji, Luping}, title = {Pseudo Visible Feature Fine-Grained Fusion for Thermal Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6710-6719} }
NVILA: Efficient Frontier Visual Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhijian and Zhu, Ligeng and Shi, Baifeng and Zhang, Zhuoyang and Lou, Yuming and Yang, Shang and Xi, Haocheng and Cao, Shiyi and Gu, Yuxian and Li, Dacheng and Li, Xiuyu and Tang, Haotian and Fang, Yunhao and Chen, Yukang and Hsieh, Cheng-Yu and Huang, De-An and Cheng, An-Chieh and Hu, Jinyi and Liu, Sifei and Krishna, Ranjay and Molchanov, Pavlo and Kautz, Jan and Yin, Hongxu and Han, Song and Lu, Yao}, title = {NVILA: Efficient Frontier Visual Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4122-4134} }
SemiETS: Integrating Spatial and Content Consistencies for Semi-Supervised End-to-end Text Spotting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2025_CVPR, author = {Luo, Dongliang and Zhu, Hanshen and Zhang, Ziyang and Liang, Dingkang and Xie, Xudong and Liu, Yuliang and Bai, Xiang}, title = {SemiETS: Integrating Spatial and Content Consistencies for Semi-Supervised End-to-end Text Spotting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9329-9338} }
NoPain: No-box Point Cloud Attack via Optimal Transport Singular Boundary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zezeng and Du, Xiaoyu and Lei, Na and Chen, Liming and Wang, Weimin}, title = {NoPain: No-box Point Cloud Attack via Optimal Transport Singular Boundary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3492-3502} }
FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2025_CVPR, author = {Zhong, Tianyun and Liang, Chao and Jiang, Jianwen and Lin, Gaojie and Yang, Jiaqi and Zhao, Zhou}, title = {FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3101-3110} }
Geometry Field Splatting with Gaussian Surfels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Kaiwen and Sivaram, Venkataram and Peng, Cheng and Ramamoorthi, Ravi}, title = {Geometry Field Splatting with Gaussian Surfels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5752-5762} }
PS-EIP: Robust Photometric Stereo Based on Event Interval Profile-
[pdf]
[supp]
[bibtex]@InProceedings{Kitazawa_2025_CVPR, author = {Kitazawa, Kazuma and Aoto, Takahito and Ikehata, Satoshi and Takatani, Tsuyoshi}, title = {PS-EIP: Robust Photometric Stereo Based on Event Interval Profile}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6241-6251} }
GenPC: Zero-shot Point Cloud Completion via 3D Generative Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, An and Zhu, Zhe and Wei, Mingqiang}, title = {GenPC: Zero-shot Point Cloud Completion via 3D Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1308-1318} }
Latent Drifting in Diffusion Models for Counterfactual Medical Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeganeh_2025_CVPR, author = {Yeganeh, Yousef and Farshad, Azade and Charisiadis, Ioannis and Hasny, Marta and Hartenberger, Martin and Ommer, Bj\"orn and Navab, Nassir and Adeli, Ehsan}, title = {Latent Drifting in Diffusion Models for Counterfactual Medical Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7685-7695} }
Rethinking Spiking Self-Attention Mechanism: Implementing a-XNOR Similarity Calculation in Spiking Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yichen and Wang, Shuai and Zhang, Dehao and Wei, Wenjie and Shan, Yimeng and Liu, Xiaoli and Jiang, Yulin and Zhang, Malu}, title = {Rethinking Spiking Self-Attention Mechanism: Implementing a-XNOR Similarity Calculation in Spiking Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5444-5454} }
HeMoRa: Unsupervised Heuristic Consensus Sampling for Robust Point Cloud Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Shaocheng and Wang, Yiming and Zhao, Kaiyan and Shi, Pengcheng and Zhao, Zhenjun and Zhang, Yongjun and Li, Jiayuan}, title = {HeMoRa: Unsupervised Heuristic Consensus Sampling for Robust Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1363-1373} }
Reducing Class-wise Confusion for Incremental Learning with Disentangled Manifolds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Huitong and Wang, Yu and Fan, Yan and Jiang, Guosong and Hu, Qinghua}, title = {Reducing Class-wise Confusion for Incremental Learning with Disentangled Manifolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10121-10130} }
AniMo: Species-Aware Model for Text-Driven Animal Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xuan and Ruan, Kai and Zhang, Xing and Wang, Gaoang}, title = {AniMo: Species-Aware Model for Text-Driven Animal Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1929-1939} }
EditAR: Unified Conditional Generation with Autoregressive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2025_CVPR, author = {Mu, Jiteng and Vasconcelos, Nuno and Wang, Xiaolong}, title = {EditAR: Unified Conditional Generation with Autoregressive Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7899-7909} }
Instance-wise Supervision-level Optimization in Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsuo_2025_CVPR, author = {Matsuo, Shinnosuke and Togashi, Riku and Bise, Ryoma and Uchida, Seiichi and Nomura, Masahiro}, title = {Instance-wise Supervision-level Optimization in Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4939-4947} }
BHViT: Binarized Hybrid Vision Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Tian and Zhang, Yu and Zhang, Zhiyuan and Liu, Huajun and Yin, Kaijie and Xu, Chengzhong and Kong, Hui}, title = {BHViT: Binarized Hybrid Vision Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3563-3572} }
Pathways on the Image Manifold: Image Editing via Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rotstein_2025_CVPR, author = {Rotstein, Noam and Yona, Gal and Silver, Daniel and Velich, Roy and Bensaid, David and Kimmel, Ron}, title = {Pathways on the Image Manifold: Image Editing via Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7857-7866} }
DeSplat: Decomposed Gaussian Splatting for Distractor-Free Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yihao and Klasson, Marcus and Turkulainen, Matias and Wang, Shuzhe and Kannala, Juho and Solin, Arno}, title = {DeSplat: Decomposed Gaussian Splatting for Distractor-Free Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {722-732} }
Stable Flow: Vital Layers for Training-Free Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Avrahami_2025_CVPR, author = {Avrahami, Omri and Patashnik, Or and Fried, Ohad and Nemchinov, Egor and Aberman, Kfir and Lischinski, Dani and Cohen-Or, Daniel}, title = {Stable Flow: Vital Layers for Training-Free Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7877-7888} }
TokenMotion: Decoupled Motion Control via Token Disentanglement for Human-centric Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Ruineng and Xing, Daitao and Sun, Huiming and Ha, Yuanzhou and Shen, Jinglin and Ho, Chiuman}, title = {TokenMotion: Decoupled Motion Control via Token Disentanglement for Human-centric Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1951-1961} }
CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nwoye_2025_CVPR, author = {Nwoye, Chinedu Innocent and Elgohary, Kareem and Srinivas, Anvita and Zaid, Fauzan and Lavanchy, Jo\"el L. and Padoy, Nicolas}, title = {CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8942-8952} }
Conditional Balance: Improving Multi-Conditioning Trade-Offs in Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cohen_2025_CVPR, author = {Cohen, Nadav Z. and Nir, Oron and Shamir, Ariel}, title = {Conditional Balance: Improving Multi-Conditioning Trade-Offs in Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2641-2650} }
KeyFace: Expressive Audio-Driven Facial Animation for Long Sequences via KeyFrame Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Bigata_2025_CVPR, author = {Bigata, Antoni and Stypu{\l}kowski, Micha{\l} and Mira, Rodrigo and Bounareli, Stella and Vougioukas, Konstantinos and Landgraf, Zoe and Drobyshev, Nikita and Zieba, Maciej and Petridis, Stavros and Pantic, Maja}, title = {KeyFace: Expressive Audio-Driven Facial Animation for Long Sequences via KeyFrame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5477-5488} }
Context-Enhanced Memory-Refined Transformer for Online Action Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Zhanzhong and Sener, Fadime and Yao, Angela}, title = {Context-Enhanced Memory-Refined Transformer for Online Action Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8700-8710} }
Data-Free Group-Wise Fully Quantized Winograd Convolution via Learnable Scales-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Shuokai and Tuzi, Gerti and Sreeram, Sudarshan and Gope, Dibakar}, title = {Data-Free Group-Wise Fully Quantized Winograd Convolution via Learnable Scales}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4091-4100} }
GEN3C: 3D-Informed World-Consistent Video Generation with Precise Camera Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Xuanchi and Shen, Tianchang and Huang, Jiahui and Ling, Huan and Lu, Yifan and Nimier-David, Merlin and M\"uller, Thomas and Keller, Alexander and Fidler, Sanja and Gao, Jun}, title = {GEN3C: 3D-Informed World-Consistent Video Generation with Precise Camera Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6121-6132} }
A Dataset for Semantic Segmentation in the Presence of Unknowns-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Laskar_2025_CVPR, author = {Laskar, Zakaria and Vojir, Tomas and Grcic, Matej and Melekhov, Iaroslav and Gangisetty, Shankar and Kannala, Juho and Matas, Jiri and Tolias, Giorgos and Jawahar, C.V.}, title = {A Dataset for Semantic Segmentation in the Presence of Unknowns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1439-1448} }
HierarQ: Task-Aware Hierarchical Q-Former for Enhanced Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Azad_2025_CVPR, author = {Azad, Shehreen and Vineet, Vibhav and Rawat, Yogesh Singh}, title = {HierarQ: Task-Aware Hierarchical Q-Former for Enhanced Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8545-8556} }
CASP: Compression of Large Multimodal Models Based on Attention Sparsity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gholami_2025_CVPR, author = {Gholami, Mohsen and Akbari, Mohammad and Cannons, Kevin and Zhang, Yong}, title = {CASP: Compression of Large Multimodal Models Based on Attention Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9372-9381} }
UNIC-Adapter: Unified Image-instruction Adapter with Multi-modal Transformer for Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2025_CVPR, author = {Duan, Lunhao and Zhao, Shanshan and Yan, Wenjun and Li, Yinglun and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu and Gong, Mingming and Xia, Gui-Song}, title = {UNIC-Adapter: Unified Image-instruction Adapter with Multi-modal Transformer for Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7963-7973} }
Towards Cost-Effective Learning: A Synergy of Semi-Supervised and Active Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2025_CVPR, author = {Yin, Tianxiang and Liu, Ningzhong and Sun, Han}, title = {Towards Cost-Effective Learning: A Synergy of Semi-Supervised and Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10163-10172} }
Generative Photography: Scene-Consistent Camera Control for Realistic Text-to-Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yu and Wang, Xijun and Sheng, Yichen and Chennuri, Prateek and Zhang, Xingguang and Chan, Stanley}, title = {Generative Photography: Scene-Consistent Camera Control for Realistic Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7920-7930} }
Advancing Manga Analysis: Comprehensive Segmentation Annotations for the Manga109 Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Minshan and Lin, Jian and Liu, Hanyuan and Li, Chengze and Wong, Tien-Tsin}, title = {Advancing Manga Analysis: Comprehensive Segmentation Annotations for the Manga109 Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8869-8878} }
EnvGS: Modeling View-Dependent Appearance with Environment Gaussian-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Tao and Chen, Xi and Xu, Zhen and Xie, Yiman and Jin, Yudong and Shen, Yujun and Peng, Sida and Bao, Hujun and Zhou, Xiaowei}, title = {EnvGS: Modeling View-Dependent Appearance with Environment Gaussian}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5742-5751} }
Provoking Multi-modal Few-Shot LVLM via Exploration-Exploitation In-Context Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Cheng and Zhai, Yunpeng and Zhao, Yifan and Gao, Jinyang and Ding, Bolin and Li, Jia}, title = {Provoking Multi-modal Few-Shot LVLM via Exploration-Exploitation In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3826-3835} }
MonoDGP: Monocular 3D Object Detection with Decoupled-Query and Geometry-Error Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pu_2025_CVPR, author = {Pu, Fanqi and Wang, Yifan and Deng, Jiru and Yang, Wenming}, title = {MonoDGP: Monocular 3D Object Detection with Decoupled-Query and Geometry-Error Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6520-6530} }
Flexible Group Count Enables Hassle-Free Structured Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiamu and Zhong, Shaochen and Ye, Andrew and Liu, Zirui and Zhao, Sebastian and Zhou, Kaixiong and Li, Li and Choi, Soo-Hyun and Chen, Rui and Hu, Xia and Xu, Shuai and Chaudhary, Vipin}, title = {Flexible Group Count Enables Hassle-Free Structured Pruning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4807-4818} }
EasyCraft: A Robust and Efficient Framework for Automatic Avatar Crafting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Suzhen and Chen, Weijie and Zhang, Wei and Zhao, Minda and Li, Lincheng and Zhang, Rongsheng and Hu, Zhipeng and Yu, Xin}, title = {EasyCraft: A Robust and Efficient Framework for Automatic Avatar Crafting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5581-5591} }
MeshArt: Generating Articulated Meshes with Structure-Guided Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2025_CVPR, author = {Gao, Daoyi and Siddiqui, Yawar and Li, Lei and Dai, Angela}, title = {MeshArt: Generating Articulated Meshes with Structure-Guided Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {618-627} }
Adaptive Non-Uniform Timestep Sampling for Accelerating Diffusion Model Training-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Myunsoo and Ki, Donghyeon and Shim, Seong-Woong and Lee, Byung-Jun}, title = {Adaptive Non-Uniform Timestep Sampling for Accelerating Diffusion Model Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2513-2522} }
Explainable Saliency: Articulating Reasoning with Contextual Prioritization-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Nuo and Jiang, Ming and Zhao, Qi}, title = {Explainable Saliency: Articulating Reasoning with Contextual Prioritization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9601-9610} }
Compass Control: Multi Object Orientation Control for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parihar_2025_CVPR, author = {Parihar, Rishubh and Agrawal, Vaibhav and VS, Sachidanand and Radhakrishnan, Venkatesh Babu}, title = {Compass Control: Multi Object Orientation Control for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2791-2801} }
Unleashing the Potential of Consistency Learning for Detecting and Grounding Multi-Modal Media Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Yiheng and Yang, Yang and Tan, Zichang and Liu, Huan and Chen, Weihua and Zhou, Xu and Lei, Zhen}, title = {Unleashing the Potential of Consistency Learning for Detecting and Grounding Multi-Modal Media Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9242-9252} }
VideoGEM: Training-free Action Grounding in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vogel_2025_CVPR, author = {Vogel, Felix and Bousselham, Walid and Kukleva, Anna and Shvetsova, Nina and Kuehne, Hilde}, title = {VideoGEM: Training-free Action Grounding in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3374-3383} }
Structure-from-Motion with a Non-Parametric Camera Model-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yihan and Pan, Linfei and Pollefeys, Marc and Larsson, Viktor}, title = {Structure-from-Motion with a Non-Parametric Camera Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1040-1049} }
LAL: Enhancing 3D Human Motion Prediction with Latency-aware Auxiliary Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Xiaoning and Wei, Dong and Sun, Huaijiang and Hu, Shengxiang}, title = {LAL: Enhancing 3D Human Motion Prediction with Latency-aware Auxiliary Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7105-7114} }
RefPose: Leveraging Reference Geometric Correspondences for Accurate 6D Pose Estimation of Unseen Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Jaeguk and Park, Jaewoo and Lee, Keuntek and Cho, Nam Ik}, title = {RefPose: Leveraging Reference Geometric Correspondences for Accurate 6D Pose Estimation of Unseen Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6447-6456} }
Relation3D : Enhancing Relation Modeling for Point Cloud Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2025_CVPR, author = {Lu, Jiahao and Deng, Jiacheng}, title = {Relation3D : Enhancing Relation Modeling for Point Cloud Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8889-8899} }
Shape My Moves: Text-Driven Shape-Aware Synthesis of Human Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2025_CVPR, author = {Liao, Ting-Hsuan and Zhou, Yi and Shen, Yu and Huang, Chun-Hao Paul and Mitra, Saayan and Huang, Jia-Bin and Bhattacharya, Uttaran}, title = {Shape My Moves: Text-Driven Shape-Aware Synthesis of Human Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1917-1928} }
Beyond Human Perception: Understanding Multi-Object World from Monocular View-
[pdf]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Keyu and Huang, Yongle and Sun, Shijie and Song, Xiangyu and Feng, Mingtao and Liu, Zedong and Song, Huansheng and Wang, Tiantian and Li, Jianxin and Akhtar, Naveed and Mian, Ajmal Saeed}, title = {Beyond Human Perception: Understanding Multi-Object World from Monocular View}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3751-3760} }
LIRM: Large Inverse Rendering Model for Progressive Reconstruction of Shape, Materials and View-dependent Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Zhengqin and Wang, Dilin and Chen, Ka and Lv, Zhaoyang and Nguyen-Phuoc, Thu and Lee, Milim and Huang, Jia-Bin and Xiao, Lei and Zhu, Yufeng and Marshall, Carl S. and Ren, Yuheng and Newcombe, Richard and Dong, Zhao}, title = {LIRM: Large Inverse Rendering Model for Progressive Reconstruction of Shape, Materials and View-dependent Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {505-517} }
Latent Space Super-Resolution for Higher-Resolution Image Generation with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2025_CVPR, author = {Jeong, Jinho and Han, Sangmin and Kim, Jinwoo and Kim, Seon Joo}, title = {Latent Space Super-Resolution for Higher-Resolution Image Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2355-2365} }
Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2025_CVPR, author = {Lai, Huakai and Xiong, Guoxin and Mai, Huayu and Liu, Xiang and Zhang, Tianzhu}, title = {Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9231-9241} }
Scaling Vision Pre-Training to 4K Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2025_CVPR, author = {Shi, Baifeng and Li, Boyi and Cai, Han and Lu, Yao and Liu, Sifei and Pavone, Marco and Kautz, Jan and Han, Song and Darrell, Trevor and Molchanov, Pavlo and Yin, Hongxu}, title = {Scaling Vision Pre-Training to 4K Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9631-9640} }
GarmentPile: Point-Level Visual Affordance Guided Retrieval and Adaptation for Cluttered Garments Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Ruihai and Zhu, Ziyu and Wang, Yuran and Chen, Yue and Wang, Jiarui and Dong, Hao}, title = {GarmentPile: Point-Level Visual Affordance Guided Retrieval and Adaptation for Cluttered Garments Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6950-6959} }
Improving Editability in Image Generation with Layer-wise Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Daneul and Lee, Jaeah and Park, Jaesik}, title = {Improving Editability in Image Generation with Layer-wise Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7889-7898} }
Simplification Is All You Need against Out-of-Distribution Overconfidence-
[pdf]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Keke and Hou, Chao and Peng, Weilong and Fang, Xiang and Wu, Zhize and Nie, Yongwei and Wang, Wenping and Tian, Zhihong}, title = {Simplification Is All You Need against Out-of-Distribution Overconfidence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5030-5040} }
SpatialDreamer: Self-supervised Stereo Video Synthesis from Monocular Input-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2025_CVPR, author = {Lv, Zhen and Long, Yangqi and Huang, Congzhentao and Li, Cao and Lv, Chengfei and Ren, Hao and Zheng, Dian}, title = {SpatialDreamer: Self-supervised Stereo Video Synthesis from Monocular Input}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {811-821} }
LOD-GS: Achieving Levels of Detail using Scalable Gaussian Soup-
[pdf]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Jianxiong and Qian, Yue and Zhan, Xiaohang}, title = {LOD-GS: Achieving Levels of Detail using Scalable Gaussian Soup}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {671-680} }
The Devil is in Low-Level Features for Cross-Domain Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhan and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {The Devil is in Low-Level Features for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4618-4627} }
Discrete to Continuous: Generating Smooth Transition Poses from Sign Language Observations-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Shengeng and He, Jiayi and Cheng, Lechao and Wu, Jingjing and Guo, Dan and Hong, Richang}, title = {Discrete to Continuous: Generating Smooth Transition Poses from Sign Language Observations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3481-3491} }
Unified Medical Lesion Segmentation via Self-referring Indicator-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Shijie and Zhao, Xiaoqi and Zhang, Lihe and Wang, Tiancheng}, title = {Unified Medical Lesion Segmentation via Self-referring Indicator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10414-10424} }
Unveiling Differences in Generative Models: A Scalable Differential Clustering Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jingwei and Jalali, Mohammad and Li, Cheuk Ting and Farnia, Farzan}, title = {Unveiling Differences in Generative Models: A Scalable Differential Clustering Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8269-8278} }
Semantic Library Adaptation: LoRA Retrieval and Fusion for Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qorbani_2025_CVPR, author = {Qorbani, Reza and Villani, Gianluca and Panagiotakopoulos, Theodoros and Colomer, Marc Botet and H\"arenstam-Nielsen, Linus and Segu, Mattia and Dovesi, Pier Luigi and Karlgren, Jussi and Cremers, Daniel and Tombari, Federico and Poggi, Matteo}, title = {Semantic Library Adaptation: LoRA Retrieval and Fusion for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9804-9815} }
PhyS-EdiT: Physics-aware Semantic Image Editing with Text Description-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Ziqi and Weng, Shuchen and Xia, Yifei and Shi, Boxin}, title = {PhyS-EdiT: Physics-aware Semantic Image Editing with Text Description}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7867-7876} }
SceneDiffuser++: City-Scale Traffic Simulation via a Generative World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Shuhan and Lambert, John and Jeon, Hong and Kulshrestha, Sakshum and Bai, Yijing and Luo, Jing and Anguelov, Dragomir and Tan, Mingxing and Jiang, Chiyu Max}, title = {SceneDiffuser++: City-Scale Traffic Simulation via a Generative World Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1570-1580} }
Gaussian Splatting Feature Fields for (Privacy-Preserving) Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Pietrantoni_2025_CVPR, author = {Pietrantoni, Maxime and Csurka, Gabriela and Sattler, Torsten}, title = {Gaussian Splatting Feature Fields for (Privacy-Preserving) Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1082-1092} }
Gazing Into Missteps: Leveraging Eye-Gaze for Unsupervised Mistake Detection in Egocentric Videos of Skilled Human Activities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mazzamuto_2025_CVPR, author = {Mazzamuto, Michele and Furnari, Antonino and Sato, Yoichi and Farinella, Giovanni Maria}, title = {Gazing Into Missteps: Leveraging Eye-Gaze for Unsupervised Mistake Detection in Egocentric Videos of Skilled Human Activities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8310-8320} }
Recover and Match: Open-Vocabulary Multi-Label Recognition through Knowledge-Constrained Optimal Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2025_CVPR, author = {Tan, Hao and Tan, Zichang and Li, Jun and Liu, Ajian and Wan, Jun and Lei, Zhen}, title = {Recover and Match: Open-Vocabulary Multi-Label Recognition through Knowledge-Constrained Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4650-4660} }
DyFo: A Training-Free Dynamic Focus Visual Search for Enhancing LMMs in Fine-Grained Visual Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Geng and Xu, Jinglin and Zhao, Yunzhen and Peng, Yuxin}, title = {DyFo: A Training-Free Dynamic Focus Visual Search for Enhancing LMMs in Fine-Grained Visual Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9098-9108} }
From Head to Tail: Towards Balanced Representation in Large Vision-Language Models through Adaptive Data Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2025_CVPR, author = {Song, Mingyang and Qu, Xiaoye and Zhou, Jiawei and Cheng, Yu}, title = {From Head to Tail: Towards Balanced Representation in Large Vision-Language Models through Adaptive Data Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9434-9444} }
HERA: Hybrid Explicit Representation for Ultra-Realistic Head Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2025_CVPR, author = {Cai, Hongrui and Xiao, Yuting and Wang, Xuan and Li, Jiafei and Guo, Yudong and Fan, Yanbo and Gao, Shenghua and Zhang, Juyong}, title = {HERA: Hybrid Explicit Representation for Ultra-Realistic Head Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {260-270} }
CoE: Chain-of-Explanation via Automatic Visual Concept Circuit Description and Polysemanticity Quantification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Wenlong and Wang, Qilong and Liu, Chuang and Li, Dong and Hu, Qinghua}, title = {CoE: Chain-of-Explanation via Automatic Visual Concept Circuit Description and Polysemanticity Quantification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4364-4374} }
Hierarchical Adaptive Filtering Network for Text Image Specular Highlight Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zhi and Hu, Jingbo and Zhang, Ling and Fu, Gang and Xiao, Chunxia}, title = {Hierarchical Adaptive Filtering Network for Text Image Specular Highlight Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2408-2417} }
Learning Extremely High Density Crowds as Active Matters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Feixiang and Yue, Jiangbei and Zhu, Jialin and Seyfried, Armin and Casas, Dan and Pettr\'e, Julien and Wang, He}, title = {Learning Extremely High Density Crowds as Active Matters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {540-550} }
EchoMimicV2: Towards Striking, Simplified, and Semi-Body Human Animation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Meng_2025_CVPR, author = {Meng, Rang and Zhang, Xingyu and Li, Yuming and Ma, Chenguang}, title = {EchoMimicV2: Towards Striking, Simplified, and Semi-Body Human Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5489-5498} }
Gaussian Splatting for Efficient Satellite Image Photogrammetry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aira_2025_CVPR, author = {Aira, Luca Savant and Facciolo, Gabriele and Ehret, Thibaud}, title = {Gaussian Splatting for Efficient Satellite Image Photogrammetry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5959-5969} }
Towards Improved Text-Aligned Codebook Learning: Multi-Hierarchical Codebook-Text Alignment with Long Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Guotao and Zhang, Baoquan and Wen, Zhiyuan and Zhao, Junteng and Ye, Yunming and Ye, Kola and He, Yao}, title = {Towards Improved Text-Aligned Codebook Learning: Multi-Hierarchical Codebook-Text Alignment with Long Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4060-4069} }
Parallel Sequence Modeling via Generalized Spatial Propagation Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hongjun and Byeon, Wonmin and Xu, Jiarui and Gu, Jinwei and Cheung, Ka Chun and Wang, Xiaolong and Han, Kai and Kautz, Jan and Liu, Sifei}, title = {Parallel Sequence Modeling via Generalized Spatial Propagation Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4473-4483} }
NADER: Neural Architecture Design via Multi-Agent Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Zekang and Zeng, Wang and Jin, Sheng and Qian, Chen and Luo, Ping and Liu, Wentao}, title = {NADER: Neural Architecture Design via Multi-Agent Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4452-4461} }
Fortifying Federated Learning Towards Trustworthiness via Auditable Data Valuation and Verifiable Client Contribution-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2025_CVPR, author = {Kumar, K Naveen and Jha, Ranjeet Ranjan and Mohan, C Krishna and Tallamraju, Ravindra Babu}, title = {Fortifying Federated Learning Towards Trustworthiness via Auditable Data Valuation and Verifiable Client Contribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4999-5009} }
UniPre3D: Unified Pre-training of 3D Point Cloud Models with Cross-Modal Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Ziyi and Zhang, Yanran and Zhou, Jie and Lu, Jiwen}, title = {UniPre3D: Unified Pre-training of 3D Point Cloud Models with Cross-Modal Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1319-1329} }
Charm: The Missing Piece in ViT Fine-Tuning for Image Aesthetic Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behrad_2025_CVPR, author = {Behrad, Fatemeh and Tuytelaars, Tinne and Wagemans, Johan}, title = {Charm: The Missing Piece in ViT Fine-Tuning for Image Aesthetic Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7815-7824} }
SeeGround: See and Ground for Zero-Shot Open-Vocabulary 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Rong and Li, Shijie and Kong, Lingdong and Yang, Xulei and Liang, Junwei}, title = {SeeGround: See and Ground for Zero-Shot Open-Vocabulary 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3707-3717} }
Linear Attention Modeling for Learned Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2025_CVPR, author = {Feng, Donghui and Cheng, Zhengxue and Wang, Shen and Wu, Ronghua and Hu, Hongwei and Lu, Guo and Song, Li}, title = {Linear Attention Modeling for Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7623-7632} }
Asynchronous Collaborative Graph Representation for Frames and Events-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Dianze and Li, Jianing and Liu, Xu and Fan, Xiaopeng and Tian, Yonghong}, title = {Asynchronous Collaborative Graph Representation for Frames and Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1655-1666} }
ReconDreamer: Crafting World Models for Driving Scene Reconstruction via Online Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Chaojun and Zhao, Guosheng and Wang, Xiaofeng and Zhu, Zheng and Qin, Wenkang and Huang, Guan and Liu, Chen and Chen, Yuyin and Wang, Yida and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Jia, Peng and Lang, Xianpeng and Wang, Xingang and Mei, Wenjun}, title = {ReconDreamer: Crafting World Models for Driving Scene Reconstruction via Online Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1559-1569} }
GenFusion: Closing the Loop between Reconstruction and Generation via Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Sibo and Xu, Congrong and Huang, Binbin and Geiger, Andreas and Chen, Anpei}, title = {GenFusion: Closing the Loop between Reconstruction and Generation via Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6078-6088} }
The PanAf-FGBG Dataset: Understanding the Impact of Backgrounds in Wildlife Behaviour Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brookes_2025_CVPR, author = {Brookes, Otto and Kukushkin, Maksim and Mirmehdi, Majid and Stephens, Colleen and Dieguez, Paula and Hicks, Thurston C. and Jones, Sorrel and Lee, Kevin and McCarthy, Maureen S. and Meier, Amelia and Normand, Emmanuelle and Wessling, Erin G. and Wittig, Roman M. and Langergraber, Kevin and Zuberb\"uhler, Klaus and Boesch, Lukas and Schmid, Thomas and Arandjelovic, Mimi and K\"uhl, Hjalmar and Burghardt, Tilo}, title = {The PanAf-FGBG Dataset: Understanding the Impact of Backgrounds in Wildlife Behaviour Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5433-5443} }
Sonic: Shifting Focus to Global Audio Perception in Portrait Animation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ji_2025_CVPR, author = {Ji, Xiaozhong and Hu, Xiaobin and Xu, Zhihong and Zhu, Junwei and Lin, Chuming and He, Qingdong and Zhang, Jiangning and Luo, Donghao and Chen, Yi and Lin, Qin and Lu, Qinglin and Wang, Chengjie}, title = {Sonic: Shifting Focus to Global Audio Perception in Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {193-203} }
Multitwine: Multi-Object Compositing with Text and Layout Control-
[pdf]
[supp]
[bibtex]@InProceedings{Tarres_2025_CVPR, author = {Tarr\'es, Gemma Canet and Lin, Zhe and Zhang, Zhifei and Zhang, He and Gilbert, Andrew and Collomosse, John and Kim, Soo Ye}, title = {Multitwine: Multi-Object Compositing with Text and Layout Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8094-8104} }
Video Depth without Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Bingxin and Narnhofer, Dominik and Huang, Shengyu and Ke, Lei and Peters, Torben and Fragkiadaki, Katerina and Obukhov, Anton and Schindler, Konrad}, title = {Video Depth without Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7233-7243} }
PointLoRA: Low-Rank Adaptation with Token Selection for Point Cloud Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Song and Liu, Xiaolu and Kong, Lingdong and Xu, Jianyun and Hu, Chunyong and Fang, Gongfan and Li, Wentong and Zhu, Jianke and Wang, Xinchao}, title = {PointLoRA: Low-Rank Adaptation with Token Selection for Point Cloud Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6605-6615} }
HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chu_2025_CVPR, author = {Chu, Zedong and Xiong, Feng and Liu, Meiduo and Zhang, Jinzhi and Shao, Mingqi and Sun, Zhaoxu and Wang, Di and Xu, Mu}, title = {HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {304-313} }
GaussHDR: High Dynamic Range Gaussian Splatting via Learning Unified 3D and 2D Local Tone Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Jinfeng and Kong, Lingtong and Li, Bo and Xu, Dan}, title = {GaussHDR: High Dynamic Range Gaussian Splatting via Learning Unified 3D and 2D Local Tone Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5991-6000} }
Channel-wise Noise Scheduled Diffusion for Inverse Rendering in Indoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, JunYong and Sagong, Min-cheol and Lee, SeokYeong and Jung, Seung-Won and Kim, Ig-Jae and Cho, Junghyun}, title = {Channel-wise Noise Scheduled Diffusion for Inverse Rendering in Indoor Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5773-5782} }
Targeted Forgetting of Image Subgroups in CLIP Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zeliang and Liu, Gaowen and Fleming, Charles and Kompella, Ramana Rao and Xu, Chenliang}, title = {Targeted Forgetting of Image Subgroups in CLIP Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9870-9880} }
SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Chunlin and Wang, Hanqing and Shi, Ye and Luo, Haoyang and Yang, Sibei and Yu, Jingyi and Wang, Jingya}, title = {SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1691-1701} }
DSV-LFS: Unifying LLM-Driven Semantic Cues with Visual Features for Robust Few-Shot Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Karimi_2025_CVPR, author = {Karimi, Amin and Poullis, Charalambos}, title = {DSV-LFS: Unifying LLM-Driven Semantic Cues with Visual Features for Robust Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4584-4594} }
SemAlign3D: Semantic Correspondence between RGB-Images through Aligning 3D Object-Class Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wandel_2025_CVPR, author = {Wandel, Krispin and Wang, Hesheng}, title = {SemAlign3D: Semantic Correspondence between RGB-Images through Aligning 3D Object-Class Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1138-1147} }
DPU: Dynamic Prototype Updating for Multimodal Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Shawn and Gong, Huixian and Dong, Hao and Yang, Tiankai and Tu, Zhengzhong and Zhao, Yue}, title = {DPU: Dynamic Prototype Updating for Multimodal Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10193-10202} }
Towards Unbiased and Robust Spatio-Temporal Scene Graph Generation and Anticipation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peddi_2025_CVPR, author = {Peddi, Rohith and Saurabh, Saurabh and Shrivastava, Ayush Abhay and Singla, Parag and Gogate, Vibhav}, title = {Towards Unbiased and Robust Spatio-Temporal Scene Graph Generation and Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8648-8657} }
Boosting Domain Incremental Learning: Selecting the Optimal Parameters is All You Need-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Qiang and Song, Xiang and He, Yuhang and Han, Jizhou and Ding, Chenhao and Gao, Xinyuan and Gong, Yihong}, title = {Boosting Domain Incremental Learning: Selecting the Optimal Parameters is All You Need}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4839-4849} }
Perceptual Inductive Bias Is What You Need Before Contrastive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Junru and Li, Tianqin and Jiang, Dunhan and Wu, Shenghao and Ramirez, Alan and Lee, Tai Sing}, title = {Perceptual Inductive Bias Is What You Need Before Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9621-9630} }
FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Xiaoqin and Ma, Xusen and Hou, Xianxu and Ding, Meidan and Li, Yudong and Chen, Junliang and Chen, Wenting and Peng, Xiaoyang and Shen, Linlin}, title = {FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9154-9164} }
EffiDec3D: An Optimized Decoder for High-Performance and Efficient 3D Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Rahman_2025_CVPR, author = {Rahman, Md Mostafijur and Marculescu, Radu}, title = {EffiDec3D: An Optimized Decoder for High-Performance and Efficient 3D Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10435-10444} }
Exploring Historical Information for RGBE Visual Tracking with Mamba-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Chuanyu and Zhang, Jiqing and Wang, Yang and Ge, Huilin and Xia, Qianchen and Yin, Baocai and Yang, Xin}, title = {Exploring Historical Information for RGBE Visual Tracking with Mamba}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6500-6509} }
ArtiScene: Language-Driven Artistic 3D Scene Generation Through Image Intermediary-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2025_CVPR, author = {Gu, Zeqi and Cui, Yin and Li, Zhaoshuo and Wei, Fangyin and Ge, Yunhao and Gu, Jinwei and Liu, Ming-Yu and Davis, Abe and Ding, Yifan}, title = {ArtiScene: Language-Driven Artistic 3D Scene Generation Through Image Intermediary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2891-2901} }
Improving Sound Source Localization with Joint Slot Attention on Image and Audio-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2025_CVPR, author = {Kim, Inho and Song, Youngkil and Park, Jicheol and Kim, Won Hwa and Kwak, Suha}, title = {Improving Sound Source Localization with Joint Slot Attention on Image and Audio}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3121-3130} }
Feature-Preserving Mesh Decimation for Normal Integration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heep_2025_CVPR, author = {Heep, Moritz and Behnke, Sven and Zell, Eduard}, title = {Feature-Preserving Mesh Decimation for Normal Integration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5783-5792} }
DexHandDiff: Interaction-aware Diffusion Planning for Adaptive Dexterous Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Zhixuan and Mu, Yao and Wang, Yixiao and Chen, Tianxing and Shao, Wenqi and Zhan, Wei and Tomizuka, Masayoshi and Luo, Ping and Ding, Mingyu}, title = {DexHandDiff: Interaction-aware Diffusion Planning for Adaptive Dexterous Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1745-1755} }
VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2025_CVPR, author = {Cha, SeungJu and Lee, Kwanyoung and Kim, Ye-Chan and Oh, Hyunwoo and Kim, Dong-Jin}, title = {VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8041-8050} }
Memories of Forgotten Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rusanovsky_2025_CVPR, author = {Rusanovsky, Matan and Malnick, Shimon and Jevnisek, Amir and Fried, Ohad and Avidan, Shai}, title = {Memories of Forgotten Concepts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2966-2975} }
PSBD: Prediction Shift Uncertainty Unlocks Backdoor Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Wei and Chen, Pin-Yu and Liu, Sijia and Wang, Ren}, title = {PSBD: Prediction Shift Uncertainty Unlocks Backdoor Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10255-10264} }
Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2025_CVPR, author = {Xia, Wenke and Feng, Ruoxuan and Wang, Dong and Hu, Di}, title = {Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6981-6990} }
Multirate Neural Image Compression with Adaptive Lattice Vector Quantization-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2025_CVPR, author = {Xu, Hao and Wu, Xiaolin and Zhang, Xi}, title = {Multirate Neural Image Compression with Adaptive Lattice Vector Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7633-7642} }
EventFly: Event Camera Perception from Ground to the Sky-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2025_CVPR, author = {Kong, Lingdong and Lu, Dongyue and Xu, Xiang and Ng, Lai Xing and Ooi, Wei Tsang and Cottereau, Benoit R.}, title = {EventFly: Event Camera Perception from Ground to the Sky}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1472-1484} }
CH3Depth: Efficient and Flexible Depth Foundation Model with Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jiaqi and Wang, Yiran and Zheng, Jinghong and Zhang, Junrui and Shen, Liao and Liu, Tianqi and Cao, Zhiguo}, title = {CH3Depth: Efficient and Flexible Depth Foundation Model with Flow Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7222-7232} }
Pow3R: Empowering Unconstrained 3D Reconstruction with Camera and Scene Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2025_CVPR, author = {Jang, Wonbong and Weinzaepfel, Philippe and Leroy, Vincent and Agapito, Lourdes and Revaud, Jerome}, title = {Pow3R: Empowering Unconstrained 3D Reconstruction with Camera and Scene Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1071-1081} }
MambaVLT: Time-Evolving Multimodal State Space Model for Vision-Language Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xinqi and Zhou, Li and Zhou, Zikun and Chen, Jianqiu and He, Zhenyu}, title = {MambaVLT: Time-Evolving Multimodal State Space Model for Vision-Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8731-8741} }
Adaptive Parameter Selection for Tuning Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yi and Deng, Yi-Xuan and Guo, Meng-Hao and Hu, Shi-Min}, title = {Adaptive Parameter Selection for Tuning Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4280-4290} }
Learning-enabled Polynomial Lyapunov Function Synthesis via High-Accuracy Counterexample-Guided Framework-
[pdf]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hanrui and Qi, Niuniu and Ren, Mengxin and Liu, Banglong and Shi, Shuming and Yang, Zhengfeng}, title = {Learning-enabled Polynomial Lyapunov Function Synthesis via High-Accuracy Counterexample-Guided Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10275-10284} }
Patient-Level Anatomy Meets Scanning-Level Physics: Personalized Federated Low-Dose CT Denoising Empowered by Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Ziyuan and Chen, Yingyu and Wang, Zhiwen and Shan, Hongming and Chen, Yang and Zhang, Yi}, title = {Patient-Level Anatomy Meets Scanning-Level Physics: Personalized Federated Low-Dose CT Denoising Empowered by Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5154-5163} }
Exploiting Deblurring Networks for Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2025_CVPR, author = {Choi, Haeyun and Yang, Heemin and Han, Janghyeok and Cho, Sunghyun}, title = {Exploiting Deblurring Networks for Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6012-6021} }
Rethinking Lanes and Points in Complex Scenarios for Monocular 3D Lane Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Yifan and Huang, Junjie and Wang, Xiaofeng and Ye, Yun and Liang, Zhujin and Shan, Yi and Du, Dalong and Wang, Xingang}, title = {Rethinking Lanes and Points in Complex Scenarios for Monocular 3D Lane Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6802-6811} }
PSA-SSL: Pose and Size-aware Self-Supervised Learning on LiDAR Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Nisar_2025_CVPR, author = {Nisar, Barza and Waslander, Steven L.}, title = {PSA-SSL: Pose and Size-aware Self-Supervised Learning on LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6670-6679} }
SnapGen: Taming High-Resolution Text-to-Image Models for Mobile Devices with Efficient Architectures and Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Jierun and Hu, Dongting and Huang, Xijie and Coskun, Huseyin and Sahni, Arpit and Gupta, Aarush and Goyal, Anujraaj and Lahiri, Dishani and Singh, Rajesh and Idelbayev, Yerlan and Cao, Junli and Li, Yanyu and Cheng, Kwang-Ting and Chan, S.-H. Gary and Gong, Mingming and Tulyakov, Sergey and Kag, Anil and Xu, Yanwu and Ren, Jian}, title = {SnapGen: Taming High-Resolution Text-to-Image Models for Mobile Devices with Efficient Architectures and Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7997-8008} }
Community Forensics: Using Thousands of Generators to Train Fake Image Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jeongsoo and Owens, Andrew}, title = {Community Forensics: Using Thousands of Generators to Train Fake Image Detectors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8245-8257} }
ModeSeq: Taming Sparse Multimodal Motion Prediction with Sequential Mode Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zikang and Zhou, Hengjian and Hu, Haibo and Wen, Zihao and Wang, Jianping and Li, Yung-Hui and Huang, Yu-Kai}, title = {ModeSeq: Taming Sparse Multimodal Motion Prediction with Sequential Mode Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1612-1621} }
Quaffure: Real-Time Quasi-Static Neural Hair Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stuyck_2025_CVPR, author = {Stuyck, Tuur and Lin, Gene Wei-Chin and Larionov, Egor and Chen, Hsiao-yu and Bozic, Aljaz and Sarafianos, Nikolaos and Roble, Doug}, title = {Quaffure: Real-Time Quasi-Static Neural Hair Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {239-249} }
LumiNet: Latent Intrinsics Meets Diffusion Models for Indoor Scene Relighting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2025_CVPR, author = {Xing, Xiaoyan and Groh, Konrad and Karaoglu, Sezer and Gevers, Theo and Bhattad, Anand}, title = {LumiNet: Latent Intrinsics Meets Diffusion Models for Indoor Scene Relighting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {442-452} }
DiC: Rethinking Conv3x3 Designs in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Yuchuan and Han, Jing and Wang, Chengcheng and Liang, Yuchen and Xu, Chao and Chen, Hanting}, title = {DiC: Rethinking Conv3x3 Designs in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2469-2478} }
MoSca: Dynamic Gaussian Fusion from Casual Videos via 4D Motion Scaffolds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2025_CVPR, author = {Lei, Jiahui and Weng, Yijia and Harley, Adam W. and Guibas, Leonidas and Daniilidis, Kostas}, title = {MoSca: Dynamic Gaussian Fusion from Casual Videos via 4D Motion Scaffolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6165-6177} }
Not Just Text: Uncovering Vision Modality Typographic Threats in Image Generation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Hao and Xiao, Erjia and Yang, Jiayan and Cao, Jiahang and Zhang, Qiang and Zhang, Jize and Xu, Kaidi and Gu, Jindong and Xu, Renjing}, title = {Not Just Text: Uncovering Vision Modality Typographic Threats in Image Generation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2997-3007} }
TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2025_CVPR, author = {Pan, Liang and Yang, Zeshi and Dou, Zhiyang and Wang, Wenjia and Huang, Buzhen and Dai, Bo and Komura, Taku and Wang, Jingbo}, title = {TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5379-5391} }
ShapeShifter: 3D Variations Using Multiscale and Sparse Point-Voxel Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maruani_2025_CVPR, author = {Maruani, Nissim and Yifan, Wang and Fisher, Matthew and Alliez, Pierre and Desbrun, Mathieu}, title = {ShapeShifter: 3D Variations Using Multiscale and Sparse Point-Voxel Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {605-617} }
FRESA: Feedforward Reconstruction of Personalized Skinned Avatars from Few Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Rong and Prada, Fabian and Wang, Ziyan and Jiang, Zhongshi and Yin, Chengxiang and Li, Junxuan and Saito, Shunsuke and Santesteban, Igor and Romero, Javier and Joshi, Rohan and Li, Hongdong and Saragih, Jason and Sheikh, Yaser}, title = {FRESA: Feedforward Reconstruction of Personalized Skinned Avatars from Few Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {281-291} }
ReasonGrounder: LVLM-Guided Hierarchical Feature Splatting for Open-Vocabulary 3D Visual Grounding and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhenyang and Wang, Yikai and Zheng, Sixiao and Pan, Tongying and Liang, Longfei and Fu, Yanwei and Xue, Xiangyang}, title = {ReasonGrounder: LVLM-Guided Hierarchical Feature Splatting for Open-Vocabulary 3D Visual Grounding and Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3718-3727} }
WonderWorld: Interactive 3D Scene Generation from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Hong-Xing and Duan, Haoyi and Herrmann, Charles and Freeman, William T. and Wu, Jiajun}, title = {WonderWorld: Interactive 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5916-5926} }
A Lightweight UDF Learning Framework for 3D Reconstruction Based on Local Shape Functions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Jiangbei and Li, Yanggeng and Hou, Fei and Hou, Junhui and Zhang, Zhebin and Wang, Shengfa and Lei, Na and He, Ying}, title = {A Lightweight UDF Learning Framework for 3D Reconstruction Based on Local Shape Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1297-1307} }
DiffCAM: Data-Driven Saliency Maps by Capturing Feature Differences-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xingjian and Zhao, Qiming and Bisht, Neelesh and Uddin, Mostofa Rafid and Kim, Jin Yu and Zhang, Bryan and Xu, Min}, title = {DiffCAM: Data-Driven Saliency Maps by Capturing Feature Differences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10327-10337} }
From Sparse Signal to Smooth Motion: Real-Time Motion Generation with Rolling Prediction Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barquero_2025_CVPR, author = {Barquero, German and Bertsch, Nadine and Marramreddy, Manojkumar and Chac\'on, Carlos and Arcadu, Filippo and Rigual, Ferran and He, Nicky Sijia and Palmero, Cristina and Escalera, Sergio and Ye, Yuting and Kips, Robin}, title = {From Sparse Signal to Smooth Motion: Real-Time Motion Generation with Rolling Prediction Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1850-1860} }
Imagine and Seek: Improving Composed Image Retrieval with an Imagined Proxy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, You and Ma, Fan and Yang, Yi}, title = {Imagine and Seek: Improving Composed Image Retrieval with an Imagined Proxy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3984-3993} }
EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Kai and Gou, Yunhao and Huang, Runhui and Liu, Zhili and Tan, Daxin and Xu, Jing and Wang, Chunwei and Zhu, Yi and Zeng, Yihan and Yang, Kuo and Wang, Dingdong and Xiang, Kun and Li, Haoyuan and Bai, Haoli and Han, Jianhua and Li, Xiaohui and Jin, Weike and Xie, Nian and Zhang, Yu and Kwok, James T. and Zhao, Hengshuang and Liang, Xiaodan and Yeung, Dit-Yan and Chen, Xiao and Li, Zhenguo and Zhang, Wei and Liu, Qun and Hong, Lanqing and Hou, Lu and Xu, Hang}, title = {EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5455-5466} }
Reversing Flow for Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2025_CVPR, author = {Qin, Haina and Luo, Wenyang and Wang, Libin and Zheng, Dandan and Chen, Jingdong and Yang, Ming and Li, Bing and Hu, Weiming}, title = {Reversing Flow for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7545-7558} }
Shadow Generation Using Diffusion Model with Geometry Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Haonan and Liu, Qingyang and Tao, Xinhao and Niu, Li and Zhai, Guangtao}, title = {Shadow Generation Using Diffusion Model with Geometry Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7603-7612} }
Rethinking Epistemic and Aleatoric Uncertainty for Active Open-Set Annotation: An Energy-Based Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zong_2025_CVPR, author = {Zong, Chen-Chen and Huang, Sheng-Jun}, title = {Rethinking Epistemic and Aleatoric Uncertainty for Active Open-Set Annotation: An Energy-Based Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10153-10162} }
Any3DIS: Class-Agnostic 3D Instance Segmentation by 2D Mask Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Phuc and Luu, Minh and Tran, Anh and Pham, Cuong and Nguyen, Khoi}, title = {Any3DIS: Class-Agnostic 3D Instance Segmentation by 2D Mask Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3636-3645} }
FDS: Frequency-Aware Denoising Score for Text-Guided Latent Diffusion Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2025_CVPR, author = {Ren, Yufan and Jiang, Zicong and Zhang, Tong and Forchhammer, S{\o}ren and S\"usstrunk, Sabine}, title = {FDS: Frequency-Aware Denoising Score for Text-Guided Latent Diffusion Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2651-2660} }
MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Jian and Yin, Dacheng and Zhou, Yizhou and Rao, Fengyun and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7974-7985} }
ROS-SAM: High-Quality Interactive Segmentation for Remote Sensing Moving Object-
[pdf]
[supp]
[bibtex]@InProceedings{Shan_2025_CVPR, author = {Shan, Zhe and Liu, Yang and Zhou, Lei and Yan, Cheng and Wang, Heng and Xie, Xia}, title = {ROS-SAM: High-Quality Interactive Segmentation for Remote Sensing Moving Object}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3625-3635} }
MI-DETR: An Object Detection Model with Multi-time Inquiries Mechanism-
[pdf]
[supp]
[bibtex]@InProceedings{Nan_2025_CVPR, author = {Nan, Zhixiong and Li, Xianghong and Dai, Jifeng and Xiang, Tao}, title = {MI-DETR: An Object Detection Model with Multi-time Inquiries Mechanism}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4703-4712} }
Synthetic Visual Genome-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2025_CVPR, author = {Park, Jae Sung and Ma, Zixian and Li, Linjie and Zheng, Chenhao and Hsieh, Cheng-Yu and Lu, Ximing and Chandu, Khyathi and Kong, Quan and Kobori, Norimasa and Farhadi, Ali and Choi, Yejin and Krishna, Ranjay}, title = {Synthetic Visual Genome}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9073-9086} }
Stop Learning it all to Mitigate Visual Hallucination, Focus on the Hallucination Target.-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2025_CVPR, author = {Yoon, Dokyoon and Song, Youngsook and Park, Woomyoung}, title = {Stop Learning it all to Mitigate Visual Hallucination, Focus on the Hallucination Target.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4200-4208} }
Seeing the Abstract: Translating the Abstract Language for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Talon_2025_CVPR, author = {Talon, Davide and Girella, Federico and Liu, Ziyue and Cristani, Marco and Wang, Yiming}, title = {Seeing the Abstract: Translating the Abstract Language for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9253-9262} }
One-Step Event-Driven High-Speed Autofocus-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2025_CVPR, author = {Bao, Yuhan and Gao, Shaohua and Li, Wenyong and Wang, Kaiwei}, title = {One-Step Event-Driven High-Speed Autofocus}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6222-6230} }
PanDA: Towards Panoramic Depth Anything with Unlabeled Panoramas and Mobius Spatial Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Zidong and Zhu, Jinjing and Zhang, Weiming and Ai, Hao and Bai, Haotian and Zhao, Hengshuang and Wang, Lin}, title = {PanDA: Towards Panoramic Depth Anything with Unlabeled Panoramas and Mobius Spatial Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {982-992} }
Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Xuanchen and Wang, Jianyu and Cheng, Yuhao and Zeng, Yikun and Ren, Xingyu and Zhu, Wenhan and Zhao, Weiming and Yan, Yichao}, title = {Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {204-214} }
Scene Splatter: Momentum 3D Scene Generation from Single Image with Video Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shengjun and Li, Jinzhao and Fei, Xin and Liu, Hao and Duan, Yueqi}, title = {Scene Splatter: Momentum 3D Scene Generation from Single Image with Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6089-6098} }
JiSAM: Alleviate Labeling Burden and Corner Case Problems in Autonomous Driving via Minimal Real-World Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Runjian and Shao, Wenqi and Zhang, Bo and Shi, Shaoshuai and Jiang, Li and Luo, Ping}, title = {JiSAM: Alleviate Labeling Burden and Corner Case Problems in Autonomous Driving via Minimal Real-World Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6792-6801} }
OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Gehui and Chen, Bin and Zhao, Chen and Zhang, Lei and Zhang, Jian}, title = {OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7480-7490} }
RandAR: Decoder-only Autoregressive Visual Generation in Random Orders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2025_CVPR, author = {Pang, Ziqi and Zhang, Tianyuan and Luan, Fujun and Man, Yunze and Tan, Hao and Zhang, Kai and Freeman, William T. and Wang, Yu-Xiong}, title = {RandAR: Decoder-only Autoregressive Visual Generation in Random Orders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {45-55} }
Vid2Sim: Realistic and Interactive Simulation from Video for Urban Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Ziyang and Liu, Zhizheng and Peng, Zhenghao and Wu, Wayne and Zhou, Bolei}, title = {Vid2Sim: Realistic and Interactive Simulation from Video for Urban Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1581-1591} }
Type-R: Automatically Retouching Typos for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Shimoda_2025_CVPR, author = {Shimoda, Wataru and Inoue, Naoto and Haraguchi, Daichi and Mitani, Hayato and Uchida, Seiichi and Yamaguchi, Kota}, title = {Type-R: Automatically Retouching Typos for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2745-2754} }
Video-3D LLM: Learning Position-Aware Video Representation for 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Duo and Huang, Shijia and Wang, Liwei}, title = {Video-3D LLM: Learning Position-Aware Video Representation for 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8995-9006} }
Single Domain Generalization for Few-Shot Counting via Universal Representation Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Xianing and Huo, Si and Jiang, Borui and Hu, Hailin and Chen, Xinghao}, title = {Single Domain Generalization for Few-Shot Counting via Universal Representation Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4639-4649} }
Discovering Hidden Visual Concepts Beyond Linguistic Input in Infant Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2025_CVPR, author = {Ke, Xueyi and Tsutsui, Satoshi and Zhang, Yayun and Wen, Bihan}, title = {Discovering Hidden Visual Concepts Beyond Linguistic Input in Infant Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4343-4352} }
Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Teney_2025_CVPR, author = {Teney, Damien and Jiang, Liangze and Gogianu, Florin and Abbasnejad, Ehsan}, title = {Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {79-90} }
A General Adaptive Dual-level Weighting Mechanism for Remote Sensing Pansharpening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2025_CVPR, author = {Huang, Jie and Chen, Haorui and Ren, Jiaxuan and Peng, Siran and Deng, Liangjian}, title = {A General Adaptive Dual-level Weighting Mechanism for Remote Sensing Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7447-7456} }
RASP: Revisiting 3D Anamorphic Art for Shadow-Guided Packing of Irregular Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Debnath_2025_CVPR, author = {Debnath, Soumyaratna and Tiwari, Ashish and Sadekar, Kaustubh and Raman, Shanmuganathan}, title = {RASP: Revisiting 3D Anamorphic Art for Shadow-Guided Packing of Irregular Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5849-5858} }
Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality Gap Between Text and Vision for Zero-Shot Image Captioning-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2025_CVPR, author = {Lee, Jeong Ryong and Shin, Yejee and Son, Geonhui and Hwang, Dosik}, title = {Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality Gap Between Text and Vision for Zero-Shot Image Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4050-4059} }
MODfinity: Unsupervised Domain Adaptation with Multimodal Information Flow Intertwining-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Shanglin and Lv, Jianming and Kang, Jingdan and Zhang, Huaidong and Liang, Zequan and He, Shengfeng}, title = {MODfinity: Unsupervised Domain Adaptation with Multimodal Information Flow Intertwining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5092-5101} }
Towards Universal Soccer Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rao_2025_CVPR, author = {Rao, Jiayuan and Wu, Haoning and Jiang, Hao and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {Towards Universal Soccer Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8384-8394} }
Enhancing Privacy-Utility Trade-offs to Mitigate Memorization in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Chen and Liu, Daochang and Shah, Mubarak and Xu, Chang}, title = {Enhancing Privacy-Utility Trade-offs to Mitigate Memorization in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8182-8191} }
NVComposer: Boosting Generative Novel View Synthesis with Multiple Sparse and Unposed Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Lingen and Zhang, Zhaoyang and Li, Yaowei and Xu, Jiale and Hu, Wenbo and Li, Xiaoyu and Cheng, Weihao and Gu, Jinwei and Xue, Tianfan and Shan, Ying}, title = {NVComposer: Boosting Generative Novel View Synthesis with Multiple Sparse and Unposed Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {777-787} }
Efficient Personalization of Quantized Diffusion Model without Backpropagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2025_CVPR, author = {Seo, Hoigi and Jeong, Wongi and Lee, Kyungryeol and Chun, Se Young}, title = {Efficient Personalization of Quantized Diffusion Model without Backpropagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7717-7727} }
Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yifan and Xiao, Zeqi and Yang, Shuai and Pan, Xingang}, title = {Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {34-44} }
KVQ: Boosting Video Quality Assessment via Saliency-guided Local Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2025_CVPR, author = {Qu, Yunpeng and Yuan, Kun and Xie, Qizhi and Sun, Ming and Zhou, Chao and Wang, Jian}, title = {KVQ: Boosting Video Quality Assessment via Saliency-guided Local Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2150-2160} }
Learning Flow Fields in Attention for Controllable Person Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zijian and Liu, Shikun and Han, Xiao and Liu, Haozhe and Ng, Kam Woh and Xie, Tian and Cong, Yuren and Li, Hang and Xu, Mengmeng and Perez-Rua, Juan-Manuel and Patel, Aditya and Xiang, Tao and Shi, Miaojing and He, Sen}, title = {Learning Flow Fields in Attention for Controllable Person Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2491-2501} }
Early-Bird Diffusion: Investigating and Leveraging Timestep-Aware Early-Bird Tickets in Diffusion Models for Efficient Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Whalen_2025_CVPR, author = {Whalen, Lexington and Du, Zhenbang and You, Haoran and Li, Chaojian and Li, Sixu and Lin, Yingyan}, title = {Early-Bird Diffusion: Investigating and Leveraging Timestep-Aware Early-Bird Tickets in Diffusion Models for Efficient Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7675-7684} }
DIV-FF: Dynamic Image-Video Feature Fields For Environment Understanding in Egocentric Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Mur-Labadia_2025_CVPR, author = {Mur-Labadia, Lorenzo and Guerrero, Josechu and Martinez-Cantin, Ruben}, title = {DIV-FF: Dynamic Image-Video Feature Fields For Environment Understanding in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3470-3480} }
EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2025_CVPR, author = {Liang, Yinan and Wang, Ziwei and Xu, Xiuwei and Zhou, Jie and Lu, Jiwen}, title = {EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9445-9454} }
OnlineAnySeg: Online Zero-Shot 3D Segmentation by Visual Foundation Model Guided 2D Mask Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Yijie and Zhang, Jiazhao and Lan, Yuqing and Guo, Yulan and Dong, Dezun and Zhu, Chenyang and Xu, Kai}, title = {OnlineAnySeg: Online Zero-Shot 3D Segmentation by Visual Foundation Model Guided 2D Mask Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3676-3685} }
Tora: Trajectory-oriented Diffusion Transformer for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhenghao and Liao, Junchao and Li, Menghao and Dai, ZuoZhuo and Qiu, Bingxue and Zhu, Siyu and Qin, Long and Wang, Weizhi}, title = {Tora: Trajectory-oriented Diffusion Transformer for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2063-2073} }
Morpheus: Text-Driven 3D Gaussian Splat Shape and Color Stylization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wynn_2025_CVPR, author = {Wynn, Jamie and Qureshi, Zawar and Powierza, Jakub and Watson, Jamie and Sayed, Mohamed}, title = {Morpheus: Text-Driven 3D Gaussian Splat Shape and Color Stylization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7825-7836} }
Generalized Diffusion Detector: Mining Robust Features from Diffusion Models for Domain-Generalized Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Boyong and Ji, Yuxiang and Ye, Qianwen and Tan, Zhuoyue and Wu, Liaoni}, title = {Generalized Diffusion Detector: Mining Robust Features from Diffusion Models for Domain-Generalized Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9921-9932} }
Enhancing Dance-to-Music Generation via Negative Conditioning Latent Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Changchang and Liu, Gaowen and Fleming, Charles and Yan, Yan}, title = {Enhancing Dance-to-Music Generation via Negative Conditioning Latent Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8321-8330} }
Neuro-Symbolic Evaluation of Text-to-Video Models using Formal Verification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharan_2025_CVPR, author = {Sharan, S P and Choi, Minkyu and Shah, Sahil and Goel, Harsh and Omama, Mohammad and Chinchali, Sandeep}, title = {Neuro-Symbolic Evaluation of Text-to-Video Models using Formal Verification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8395-8405} }
Spherical Manifold Guided Diffusion Model for Panoramic Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2025_CVPR, author = {Sun, Xiancheng and Xu, Mai and Li, Shengxi and Ma, Senmao and Deng, Xin and Jiang, Lai and Shen, Gang}, title = {Spherical Manifold Guided Diffusion Model for Panoramic Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5824-5834} }
Rethinking Query-based Transformer for Continual Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yuchen and Shi, Cheng and Wang, Dingyou and Tang, Jiajin and Wei, Zhengxuan and Wu, Yu and Li, Guanbin and Yang, Sibei}, title = {Rethinking Query-based Transformer for Continual Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4595-4606} }
SlideChat: A Large Vision-Language Assistant for Whole-Slide Pathology Image Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Ying and Wang, Guoan and Ji, Yuanfeng and Li, Yanjun and Ye, Jin and Li, Tianbin and Hu, Ming and Yu, Rongshan and Qiao, Yu and He, Junjun}, title = {SlideChat: A Large Vision-Language Assistant for Whole-Slide Pathology Image Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5134-5143} }
Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Wenxuan and Xu, Xiuwei and Wang, Ziwei and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3666-3675} }
Common3D: Self-Supervised Learning of 3D Morphable Models for Common Objects in Neural Feature Space-
[pdf]
[supp]
[bibtex]@InProceedings{Sommer_2025_CVPR, author = {Sommer, Leonhard and D\"unkel, Olaf and Theobalt, Christian and Kortylewski, Adam}, title = {Common3D: Self-Supervised Learning of 3D Morphable Models for Common Objects in Neural Feature Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6468-6479} }
ECVC: Exploiting Non-Local Correlations in Multiple Frames for Contextual Video Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Wei and Li, Junru and Zhang, Kai and Zhang, Li}, title = {ECVC: Exploiting Non-Local Correlations in Multiple Frames for Contextual Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7331-7341} }
LinGen: Towards High-Resolution Minute-Length Text-to-Video Generation with Linear Computational Complexity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Hongjie and Ma, Chih-Yao and Liu, Yen-Cheng and Hou, Ji and Xu, Tao and Wang, Jialiang and Juefei-Xu, Felix and Luo, Yaqiao and Zhang, Peizhao and Hou, Tingbo and Vajda, Peter and Jha, Niraj K. and Dai, Xiaoliang}, title = {LinGen: Towards High-Resolution Minute-Length Text-to-Video Generation with Linear Computational Complexity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2578-2588} }
Towards Open-Vocabulary Audio-Visual Event Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jinxing and Guo, Dan and Guo, Ruohao and Mao, Yuxin and Hu, Jingjing and Zhong, Yiran and Chang, Xiaojun and Wang, Meng}, title = {Towards Open-Vocabulary Audio-Visual Event Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8362-8371} }
S2Gaussian: Sparse-View Super-Resolution 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2025_CVPR, author = {Wan, Yecong and Shao, Mingwen and Cheng, Yuanshuo and Zuo, Wangmeng}, title = {S2Gaussian: Sparse-View Super-Resolution 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {711-721} }
HIIF: Hierarchical Encoding based Implicit Image Function for Continuous Super-resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuxuan and Kwan, Ho Man and Peng, Tianhao and Gao, Ge and Zhang, Fan and Zhu, Xiaoqing and Sole, Joel and Bull, David}, title = {HIIF: Hierarchical Encoding based Implicit Image Function for Continuous Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2289-2299} }
Motion Prompting: Controlling Video Generation with Motion Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2025_CVPR, author = {Geng, Daniel and Herrmann, Charles and Hur, Junhwa and Cole, Forrester and Zhang, Serena and Pfaff, Tobias and Lopez-Guevara, Tatiana and Aytar, Yusuf and Rubinstein, Michael and Sun, Chen and Wang, Oliver and Owens, Andrew and Sun, Deqing}, title = {Motion Prompting: Controlling Video Generation with Motion Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1-12} }
VERA: Explainable Video Anomaly Detection via Verbalized Learning of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Muchao and Liu, Weiyang and He, Pan}, title = {VERA: Explainable Video Anomaly Detection via Verbalized Learning of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8679-8688} }
CCIN: Compositional Conflict Identification and Neutralization for Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2025_CVPR, author = {Tian, Likai and Zhao, Jian and Hu, Zechao and Yang, Zhengwei and Li, Hao and Jin, Lei and Wang, Zheng and Li, Xuelong}, title = {CCIN: Compositional Conflict Identification and Neutralization for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3974-3983} }
OverLoCK: An Overview-first-Look-Closely-next ConvNet with Context-Mixing Dynamic Kernels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lou_2025_CVPR, author = {Lou, Meng and Yu, Yizhou}, title = {OverLoCK: An Overview-first-Look-Closely-next ConvNet with Context-Mixing Dynamic Kernels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {128-138} }
Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2025_CVPR, author = {Chen, Liyan and Meyer, Gregory P. and Zhang, Zaiwei and Wolff, Eric M. and Vernaza, Paul}, title = {Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6595-6604} }
Comprehensive Relighting: Generalizable and Consistent Monocular Human Relighting and Harmonization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Junying and Liu, Jingyuan and Sun, Xin and Singh, Krishna Kumar and Shu, Zhixin and Zhang, He and Yang, Jimei and Zhao, Nanxuan and Wang, Tuanfeng Y. and Chen, Simon S. and Neumann, Ulrich and Yoon, Jae Shin}, title = {Comprehensive Relighting: Generalizable and Consistent Monocular Human Relighting and Harmonization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {380-390} }
MAtCha Gaussians: Atlas of Charts for High-Quality Geometry and Photorealism From Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guedon_2025_CVPR, author = {Guedon, Antoine and Ichikawa, Tomoki and Yamashita, Kohei and Nishino, Ko}, title = {MAtCha Gaussians: Atlas of Charts for High-Quality Geometry and Photorealism From Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6001-6011} }
Extreme Rotation Estimation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bezalel_2025_CVPR, author = {Bezalel, Hana and Ankri, Dotan and Cai, Ruojin and Averbach-Elor, Hadar}, title = {Extreme Rotation Estimation in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1061-1070} }
Traversing Distortion-Perception Tradeoff using a Single Score-Based Generative Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhan and Bi, Suzhi and Zhang, Ying-Jun Angela and Yuan, Xiaojun}, title = {Traversing Distortion-Perception Tradeoff using a Single Score-Based Generative Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2377-2386} }
Task-Agnostic Guided Feature Expansion for Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Bowen and Zhou, Da-Wei and Ye, Han-Jia and Zhan, De-Chuan}, title = {Task-Agnostic Guided Feature Expansion for Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10099-10109} }
Let's Chorus: Partner-aware Hybrid Song-Driven 3D Head Animation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Xiumei and Huang, Zikai and Xu, Wenhao and Xiao, Peng and Xu, Xuemiao and Zhang, Huaidong}, title = {Let's Chorus: Partner-aware Hybrid Song-Driven 3D Head Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5467-5476} }
Twinner: Shining Light on Digital Twins in a Few Snaps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zarzar_2025_CVPR, author = {Zarzar, Jesus and Monnier, Tom and Shapovalov, Roman and Vedaldi, Andrea and Novotny, David}, title = {Twinner: Shining Light on Digital Twins in a Few Snaps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5859-5869} }
MonoPlace3D: Learning 3D-Aware Object Placement for 3D Monocular Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parihar_2025_CVPR, author = {Parihar, Rishubh and Sarkar, Srinjay and Vora, Sarthak and Kundu, Jogendra Nath and Babu, R. Venkatesh}, title = {MonoPlace3D: Learning 3D-Aware Object Placement for 3D Monocular Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6531-6541} }
SGCR: Spherical Gaussians for Efficient 3D Curve Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2025_CVPR, author = {Yang, Xinran and Ji, Donghao and Li, Yuanqi and Guo, Jie and Guo, Yanwen and Xie, Junyuan}, title = {SGCR: Spherical Gaussians for Efficient 3D Curve Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5793-5803} }
Advancing Semantic Future Prediction through Multimodal Visual Sequence Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karypidis_2025_CVPR, author = {Karypidis, Efstathios and Kakogeorgiou, Ioannis and Gidaris, Spyros and Komodakis, Nikos}, title = {Advancing Semantic Future Prediction through Multimodal Visual Sequence Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3793-3803} }
On the Out-Of-Distribution Generalization of Large Multimodal Models-
[pdf]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xingxuan and Li, Jiansheng and Chu, Wenjing and hai, junjia and Xu, Renzhe and Yang, Yuqing and Guan, Shikai and Xu, Jiazheng and Jing, Liping and Cui, Peng}, title = {On the Out-Of-Distribution Generalization of Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10315-10326} }
Enhanced Contrastive Learning with Multi-view Longitudinal Data for Chest X-ray Report Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Kang and Ma, Zhuoqi and Kang, Xiaolu and Li, Yunan and Xie, Kun and Jiao, Zhicheng and Miao, Qiguang}, title = {Enhanced Contrastive Learning with Multi-view Longitudinal Data for Chest X-ray Report Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10348-10359} }
Scaling Inference Time Compute for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Nanye and Tong, Shangyuan and Jia, Haolin and Hu, Hexiang and Su, Yu-Chuan and Zhang, Mingda and Yang, Xuan and Li, Yandong and Jaakkola, Tommi and Jia, Xuhui and Xie, Saining}, title = {Scaling Inference Time Compute for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2523-2534} }
Chat-based Person Retrieval via Dialogue-Refined Cross-Modal Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2025_CVPR, author = {Bai, Yang and Ji, Yucheng and Cao, Min and Wang, Jinqiao and Ye, Mang}, title = {Chat-based Person Retrieval via Dialogue-Refined Cross-Modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3952-3962} }
AVF-MAE++: Scaling Affective Video Facial Masked Autoencoders via Efficient Audio-Visual Self-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Xuecheng and Sun, Heli and Wang, Yifan and Nie, Jiayu and Zhang, Jie and Wang, Yabing and Xue, Junxiao and He, Liang}, title = {AVF-MAE++: Scaling Affective Video Facial Masked Autoencoders via Efficient Audio-Visual Self-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9142-9153} }
DiffusionSfM: Predicting Structure and Motion via Ray Origin and Endpoint Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2025_CVPR, author = {Zhao, Qitao and Lin, Amy and Tan, Jeff and Zhang, Jason Y. and Ramanan, Deva and Tulsiani, Shubham}, title = {DiffusionSfM: Predicting Structure and Motion via Ray Origin and Endpoint Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6317-6326} }
GroundingFace: Fine-grained Face Understanding via Pixel Grounding Multimodal Large Language Model-
[pdf]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Yue and Zhang, Jiangning and Zhu, Junwei and Hou, Runze and Ji, Xiaozhong and Lin, Chuming and Hu, Xiaobin and Xue, Zhucun and Liu, Yong}, title = {GroundingFace: Fine-grained Face Understanding via Pixel Grounding Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3942-3951} }
DynFocus: Dynamic Cooperative Network Empowers LLMs with Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Yudong and Guo, Qingpei and Pan, Liyuan and Liu, Liu and Guan, Yu and Yang, Ming}, title = {DynFocus: Dynamic Cooperative Network Empowers LLMs with Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8512-8522} }
V-Stylist: Video Stylization via Collaboration and Reflection of MLLM Agents-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2025_CVPR, author = {Yue, Zhengrong and Zhuang, Shaobin and Li, Kunchang and Ding, Yanbo and Wang, Yali}, title = {V-Stylist: Video Stylization via Collaboration and Reflection of MLLM Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3195-3205} }
ID-Patch: Robust ID Association for Group Photo Personalization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yimeng and Zhi, Tiancheng and Liu, Jing and Sang, Shen and Jiang, Liming and Yan, Qing and Liu, Sijia and Luo, Linjie}, title = {ID-Patch: Robust ID Association for Group Photo Personalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2986-2996} }
iG-6DoF: Model-free 6DoF Pose Estimation for Unseen Object via Iterative 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2025_CVPR, author = {Cao, Tuo and Luo, Fei and Qin, Jiongming and Jiang, Yu and Wang, Yusen and Xiao, Chunxia}, title = {iG-6DoF: Model-free 6DoF Pose Estimation for Unseen Object via Iterative 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6436-6446} }
ForestLPR: LiDAR Place Recognition in Forests Attentioning Multiple BEV Density Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Yanqing and Tuna, Turcan and Hutter, Marco and Cadena, Cesar and Zheng, Nanning}, title = {ForestLPR: LiDAR Place Recognition in Forests Attentioning Multiple BEV Density Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6659-6669} }
AdaMMS: Model Merging for Heterogeneous Multimodal Large Language Models with Unsupervised Coefficient Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2025_CVPR, author = {Du, Yiyang and Wang, Xiaochen and Chen, Chi and Ye, Jiabo and Wang, Yiru and Li, Peng and Yan, Ming and Zhang, Ji and Huang, Fei and Sui, Zhifang and Sun, Maosong and Liu, Yang}, title = {AdaMMS: Model Merging for Heterogeneous Multimodal Large Language Models with Unsupervised Coefficient Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9413-9422} }
SLVR: Super-Light Visual Reconstruction via Blueprint Controllable Convolutions and Exploring Feature Diversity Representation-
[pdf]
[bibtex]@InProceedings{Ni_2025_CVPR, author = {Ni, Ning and Zhang, Libao}, title = {SLVR: Super-Light Visual Reconstruction via Blueprint Controllable Convolutions and Exploring Feature Diversity Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {400-410} }
Layered Image Vectorization via Semantic Simplification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenyu and Huang, Jianxi and Sun, Zhida and Gong, Yuanhao and Cohen-Or, Daniel and Lu, Min}, title = {Layered Image Vectorization via Semantic Simplification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7728-7738} }
Hearing Anywhere in Any Environment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Xiulong and Kumar, Anurag and Calamia, Paul and Amengual, Sebastia V. and Murdock, Calvin and Ananthabhotla, Ishwarya and Robinson, Philip and Shlizerman, Eli and Ithapu, Vamsi Krishna and Gao, Ruohan}, title = {Hearing Anywhere in Any Environment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5732-5741} }
Automated Proof of Polynomial Inequalities via Reinforcement Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Banglong and Qi, Niuniu and Zeng, Xia and Dehbi, Lydia and Yang, Zhengfeng}, title = {Automated Proof of Polynomial Inequalities via Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5052-5060} }
SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2% Training Cost-
[pdf]
[supp]
[bibtex]@InProceedings{Mei_2025_CVPR, author = {Mei, Haiyang and Zhang, Pengyu and Shou, Mike Zheng}, title = {SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2\% Training Cost}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3417-3426} }
How Do I Do That? Synthesizing 3D Hand Motion and Contacts for Everyday Interactions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Prakash_2025_CVPR, author = {Prakash, Aditya and Lundell, Benjamin and Andreychuk, Dmitry and Forsyth, David and Gupta, Saurabh and Sawhney, Harpreet}, title = {How Do I Do That? Synthesizing 3D Hand Motion and Contacts for Everyday Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7026-7036} }
Joint Vision-Language Social Bias Removal for CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haoyu and Guo, Yangyang and Kankanhalli, Mohan}, title = {Joint Vision-Language Social Bias Removal for CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4246-4255} }
MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2025_CVPR, author = {Tang, Zhenggang and Fan, Yuchen and Wang, Dilin and Xu, Hongyu and Ranjan, Rakesh and Schwing, Alexander and Yan, Zhicheng}, title = {MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5283-5293} }
Explicit Depth-Aware Blurry Video Frame Interpolation Guided by Differential Curves-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2025_CVPR, author = {Yan, Zaoming and Lei, Pengcheng and Wang, Tingting and Fang, Faming and Zhang, Junkang and Huang, Yaomin and Song, Haichuan}, title = {Explicit Depth-Aware Blurry Video Frame Interpolation Guided by Differential Curves}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1994-2004} }
MonSter: Marry Monodepth to Stereo Unleashes Power-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2025_CVPR, author = {Cheng, Junda and Liu, Longliang and Xu, Gangwei and Wang, Xianqi and Zhang, Zhaoxing and Deng, Yong and Zang, Jinliang and Chen, Yurui and Cai, Zhipeng and Yang, Xin}, title = {MonSter: Marry Monodepth to Stereo Unleashes Power}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6273-6282} }
A Tale of Two Classes: Adapting Supervised Contrastive Learning to Binary Imbalanced Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mildenberger_2025_CVPR, author = {Mildenberger, David and Hager, Paul and Rueckert, Daniel and Menten, Martin J.}, title = {A Tale of Two Classes: Adapting Supervised Contrastive Learning to Binary Imbalanced Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10305-10314} }
Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yun and Hui, Le and Yang, Hang and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9911-9920} }
Open-World Amodal Appearance Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ao_2025_CVPR, author = {Ao, Jiayang and Jiang, Yanbei and Ke, Qiuhong and Ehinger, Krista A.}, title = {Open-World Amodal Appearance Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6490-6499} }
RivuletMLP: An MLP-based Architecture for Efficient Compressed Video Quality Enhancement-
[pdf]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Gang and Wang, Weiran and Quan, Guancheng and Wang, Shihao and Zhou, Dajiang and Li, Yunsong}, title = {RivuletMLP: An MLP-based Architecture for Efficient Compressed Video Quality Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7342-7352} }
Reanimating Images using Neural Representations of Dynamic Stimuli-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeung_2025_CVPR, author = {Yeung, Jacob and Luo, Andrew F. and Sarch, Gabriel and Henderson, Margaret M. and Ramanan, Deva and Tarr, Michael J.}, title = {Reanimating Images using Neural Representations of Dynamic Stimuli}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5331-5343} }
DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Wenbo and Gao, Xiangjun and Li, Xiaoyu and Zhao, Sijie and Cun, Xiaodong and Zhang, Yong and Quan, Long and Shan, Ying}, title = {DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2005-2015} }
Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face Detector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2025_CVPR, author = {Guo, Xiao and Song, Xiufeng and Zhang, Yue and Liu, Xiaohong and Liu, Xiaoming}, title = {Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {105-116} }
Active Hyperspectral Imaging Using an Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2025_CVPR, author = {Yu, Bohan and Liang, Jinxiu and Wang, Zhuofeng and Fan, Bin and Subpa-asa, Art and Shi, Boxin and Sato, Imari}, title = {Active Hyperspectral Imaging Using an Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {929-939} }
Bridging the Gap between Gaussian Diffusion Models and Universal Quantization for Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Relic_2025_CVPR, author = {Relic, Lucas and Azevedo, Roberto and Zhang, Yang and Gross, Markus and Schroers, Christopher}, title = {Bridging the Gap between Gaussian Diffusion Models and Universal Quantization for Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2449-2458} }
SAMBLE: Shape-Specific Point Cloud Sampling for an Optimal Trade-Off Between Local Detail and Global Uniformity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Chengzhi and Wan, Yuxin and Fu, Hao and Pfrommer, Julius and Zhong, Zeyun and Zheng, Junwei and Zhang, Jiaming and Beyerer, J\"urgen}, title = {SAMBLE: Shape-Specific Point Cloud Sampling for an Optimal Trade-Off Between Local Detail and Global Uniformity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1342-1352} }
Driving by the Rules: A Benchmark for Integrating Traffic Sign Regulations into Vectorized HD Map-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2025_CVPR, author = {Chang, Xinyuan and Xue, Maixuan and Liu, Xinran and Pan, Zheng and Wei, Xing}, title = {Driving by the Rules: A Benchmark for Integrating Traffic Sign Regulations into Vectorized HD Map}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6823-6833} }
S4-Driver: Scalable Self-Supervised Driving Multimodal Large Language Model with Spatio-Temporal Visual Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Yichen and Xu, Runsheng and He, Tong and Hwang, Jyh-Jing and Luo, Katie and Ji, Jingwei and Lin, Hubert and Chen, Letian and Lu, Yiren and Leng, Zhaoqi and Anguelov, Dragomir and Tan, Mingxing}, title = {S4-Driver: Scalable Self-Supervised Driving Multimodal Large Language Model with Spatio-Temporal Visual Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1622-1632} }
Science-T2I: Addressing Scientific Illusions in Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2025_CVPR, author = {Li, Jialuo and Chai, Wenhao and Fu, Xingyu and Xu, Haiyang and Xie, Saining}, title = {Science-T2I: Addressing Scientific Illusions in Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2734-2744} }
MoST: Efficient Monarch Sparse Tuning for 3D Representation Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Han_2025_CVPR, author = {Han, Xu and Tang, Yuan and Xu, Jinfeng and Li, Xianzhi}, title = {MoST: Efficient Monarch Sparse Tuning for 3D Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6584-6594} }
Re-thinking Temporal Search for Long-Form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2025_CVPR, author = {Ye, Jinhui and Wang, Zihan and Sun, Haosen and Chandrasegaran, Keshigeyan and Durante, Zane and Eyzaguirre, Cristobal and Bisk, Yonatan and Niebles, Juan Carlos and Adeli, Ehsan and Fei-Fei, Li and Wu, Jiajun and Li, Manling}, title = {Re-thinking Temporal Search for Long-Form Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8579-8591} }
When Domain Generalization meets Generalized Category Discovery: An Adaptive Task-Arithmetic Driven Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rathore_2025_CVPR, author = {Rathore, Vaibhav and B, Shubhranil and Dutta, Saikat and Mehrotra, Sarthak and Kira, Zsolt and Banerjee, Biplab}, title = {When Domain Generalization meets Generalized Category Discovery: An Adaptive Task-Arithmetic Driven Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4905-4915} }
BIP3D: Bridging 2D Images and 3D Perception for Embodied Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Xuewu and Lin, Tianwei and Huang, Lichao and Xie, Hongyu and Su, Zhizhong}, title = {BIP3D: Bridging 2D Images and 3D Perception for Embodied Intelligence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9007-9016} }
Query Efficient Black-Box Visual Prompting with Subspace Learning-
[pdf]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaogeng and Zhang, Haozhen and Zhang, Hualin and Li, Xingchen and Shi, Wanli and Gu, Bin and Chang, Yi}, title = {Query Efficient Black-Box Visual Prompting with Subspace Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4322-4331} }
Improving Autoregressive Visual Generation with Cluster-Oriented Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2025_CVPR, author = {Hu, Teng and Zhang, Jiangning and Yi, Ran and Weng, Jieyu and Wang, Yabiao and Zeng, Xianfang and Xue, Zhucun and Ma, Lizhuang}, title = {Improving Autoregressive Visual Generation with Cluster-Oriented Token Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9351-9360} }
Dual-view X-ray Detection: Can AI Detect Prohibited Items from Dual-view X-ray Images like Humans?-
[pdf]
[bibtex]@InProceedings{Tao_2025_CVPR, author = {Tao, Renshuai and Wang, Haoyu and Guo, Yuzhe and Chen, Hairong and Zhang, Li and Liu, Xianglong and Wei, Yunchao and Zhao, Yao}, title = {Dual-view X-ray Detection: Can AI Detect Prohibited Items from Dual-view X-ray Images like Humans?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10338-10347} }
Solving Instance Detection from an Open-World Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2025_CVPR, author = {Shen, Qianqian and Zhao, Yunhan and Kwon, Nahyun and Kim, Jeeeun and Li, Yanan and Kong, Shu}, title = {Solving Instance Detection from an Open-World Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9901-9910} }
Percept, Memory, and Imagine: World Feature Simulating for Open-Domain Unknown Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2025_CVPR, author = {Wu, Aming and Deng, Cheng}, title = {Percept, Memory, and Imagine: World Feature Simulating for Open-Domain Unknown Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4682-4691} }
Efficient Depth Estimation for Unstable Stereo Camera Systems on AR Glasses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Yongfan and Kwon, Hyoukjun}, title = {Efficient Depth Estimation for Unstable Stereo Camera Systems on AR Glasses}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6252-6261} }
LiDAR-RT: Gaussian-based Ray Tracing for Dynamic LiDAR Re-simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2025_CVPR, author = {Zhou, Chenxu and Fu, Lvchang and Peng, Sida and Yan, Yunzhi and Zhang, Zhanhua and Chen, Yong and Xia, Jiazhi and Zhou, Xiaowei}, title = {LiDAR-RT: Gaussian-based Ray Tracing for Dynamic LiDAR Re-simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1538-1548} }
Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2025_CVPR, author = {Shin, Chaehun and Choi, Jooyoung and Kim, Heeseung and Yoon, Sungroh}, title = {Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7986-7996} }
Flow-NeRF: Joint Learning of Geometry, Poses, and Dense Flow within Unified Neural Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2025_CVPR, author = {Zheng, Xunzhi and Xu, Dan}, title = {Flow-NeRF: Joint Learning of Geometry, Poses, and Dense Flow within Unified Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {993-1002} }
Consistent and Controllable Image Animation with Motion Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Xin and Wang, Yaohui and Jia, Gengyun and Chen, Xinyuan and Wong, Tien-Tsin and Li, Yuan-Fang and Chen, Cunjian}, title = {Consistent and Controllable Image Animation with Motion Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7288-7298} }
AA-CLIP: Enhancing Zero-Shot Anomaly Detection via Anomaly-Aware CLIP-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2025_CVPR, author = {Ma, Wenxin and Zhang, Xu and Yao, Qingsong and Tang, Fenghe and Wu, Chenxu and Li, Yingtai and Yan, Rui and Jiang, Zihang and Zhou, S.Kevin}, title = {AA-CLIP: Enhancing Zero-Shot Anomaly Detection via Anomaly-Aware CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4744-4754} }
HybridGS: Decoupling Transients and Statics with 2D and 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2025_CVPR, author = {Lin, Jingyu and Gu, Jiaqi and Fan, Lubin and Wu, Bojian and Lou, Yujing and Chen, Renjie and Liu, Ligang and Ye, Jieping}, title = {HybridGS: Decoupling Transients and Statics with 2D and 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {788-797} }
Channel Consistency Prior and Self-Reconstruction Strategy Based Unsupervised Image Deraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2025_CVPR, author = {Dong, Guanglu and Zheng, Tianheng and Cao, Yuanzhouhan and Qing, Linbo and Ren, Chao}, title = {Channel Consistency Prior and Self-Reconstruction Strategy Based Unsupervised Image Deraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7469-7479} }
MobileMamba: Lightweight Multi-Receptive Visual Mamba Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2025_CVPR, author = {He, Haoyang and Zhang, Jiangning and Cai, Yuxuan and Chen, Hongxu and Hu, Xiaobin and Gan, Zhenye and Wang, Yabiao and Wang, Chengjie and Wu, Yunsheng and Xie, Lei}, title = {MobileMamba: Lightweight Multi-Receptive Visual Mamba Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4497-4507} }
SimLTD: Simple Supervised and Semi-Supervised Long-Tailed Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tran_2025_CVPR, author = {Tran, Phi Vu}, title = {SimLTD: Simple Supervised and Semi-Supervised Long-Tailed Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4672-4681} }
HyperSeg: Hybrid Segmentation Assistant with Fine-grained Visual Perceiver-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2025_CVPR, author = {Wei, Cong and Zhong, Yujie and Tan, Haoxian and Liu, Yong and Hu, Jie and Li, Dengjie and Zhao, Zheng and Yang, Yujiu}, title = {HyperSeg: Hybrid Segmentation Assistant with Fine-grained Visual Perceiver}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8931-8941} }
Diffusion-based Event Generation for High-Quality Image Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2025_CVPR, author = {Xie, Xinan and Zhang, Qing and Zheng, Wei-Shi}, title = {Diffusion-based Event Generation for High-Quality Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2194-2203} }
Balanced Rate-Distortion Optimization in Learned Image Compression-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yichi and Duan, Zhihao and Huang, Yuning and Zhu, Fengqing}, title = {Balanced Rate-Distortion Optimization in Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2428-2438} }
Bridge the Gap: From Weak to Full Supervision for Temporal Action Localization with PseudoFormer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2025_CVPR, author = {Liu, Ziyi and Liu, Yangcen}, title = {Bridge the Gap: From Weak to Full Supervision for Temporal Action Localization with PseudoFormer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8711-8720} }
Back