CVPR 2025 Open Access Repository

Papers

Back
Deterministic Image-to-Image Translation via Denoising Brownian Bridge Models with Dual Approximators: Bohan Xiao,

Peiyong Wang,

Qisheng He,

Ming Dong; [pdf]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Bohan and Wang, Peiyong and He, Qisheng and Dong, Ming}, title = {Deterministic Image-to-Image Translation via Denoising Brownian Bridge Models with Dual Approximators}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28232-28241} }
Towards Source-Free Machine Unlearning: Sk Miraj Ahmed,

Umit Yigit Basaran,

Dripta S. Raychaudhuri,

Arindam Dutta,

Rohit Kundu,

Fahim Faisal Niloy,

Basak Guler,

Amit K. Roy-Chowdhury; [pdf] [supp]
[bibtex]
@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Sk Miraj and Basaran, Umit Yigit and Raychaudhuri, Dripta S. and Dutta, Arindam and Kundu, Rohit and Niloy, Fahim Faisal and Guler, Basak and Roy-Chowdhury, Amit K.}, title = {Towards Source-Free Machine Unlearning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4948-4957} }
Uni4D: Unifying Visual Foundation Models for 4D Modeling from a Single Video: David Yifan Yao,

Albert J. Zhai,

Shenlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, David Yifan and Zhai, Albert J. and Wang, Shenlong}, title = {Uni4D: Unifying Visual Foundation Models for 4D Modeling from a Single Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1116-1126} }
DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI: Sangmin Lee,

Sungyong Park,

Heewon Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Sangmin and Park, Sungyong and Kim, Heewon}, title = {DynScene: Scalable Generation of Dynamic Robotic Manipulation Scenes for Embodied AI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12166-12175} }
DiffLocks: Generating 3D Hair from a Single Image using Diffusion Models: Radu Alexandru Rosu,

Keyu Wu,

Yao Feng,

Youyi Zheng,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosu_2025_CVPR, author = {Rosu, Radu Alexandru and Wu, Keyu and Feng, Yao and Zheng, Youyi and Black, Michael J.}, title = {DiffLocks: Generating 3D Hair from a Single Image using Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10847-10857} }
Hyperbolic Category Discovery: Yuanpei Liu,

Zhenqi He,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanpei and He, Zhenqi and Han, Kai}, title = {Hyperbolic Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9891-9900} }
The Language of Motion: Unifying Verbal and Non-verbal Language of 3D Human Motion: Changan Chen,

Juze Zhang,

Shrinidhi K. Lakshmikanth,

Yusu Fang,

Ruizhi Shao,

Gordon Wetzstein,

Li Fei-Fei,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Changan and Zhang, Juze and Lakshmikanth, Shrinidhi K. and Fang, Yusu and Shao, Ruizhi and Wetzstein, Gordon and Fei-Fei, Li and Adeli, Ehsan}, title = {The Language of Motion: Unifying Verbal and Non-verbal Language of 3D Human Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6200-6211} }
CALICO: Part-Focused Semantic Co-Segmentation with Large Vision-Language Models: Kiet A. Nguyen,

Adheesh Juvekar,

Tianjiao Yu,

Muntasir Wahed,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Kiet A. and Juvekar, Adheesh and Yu, Tianjiao and Wahed, Muntasir and Lourentzou, Ismini}, title = {CALICO: Part-Focused Semantic Co-Segmentation with Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4550-4561} }
Task Preference Optimization: Improving Multimodal Large Language Models with Vision Task Alignment: Ziang Yan,

Zhilin Li,

Yinan He,

Chenting Wang,

Kunchang Li,

Xinhao Li,

Xiangyu Zeng,

Zilei Wang,

Yali Wang,

Yu Qiao,

Limin Wang,

Yi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Ziang and Li, Zhilin and He, Yinan and Wang, Chenting and Li, Kunchang and Li, Xinhao and Zeng, Xiangyu and Wang, Zilei and Wang, Yali and Qiao, Yu and Wang, Limin and Wang, Yi}, title = {Task Preference Optimization: Improving Multimodal Large Language Models with Vision Task Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29880-29892} }
Cross-modal Causal Relation Alignment for Video Question Grounding: Weixing Chen,

Yang Liu,

Binglin Chen,

Jiandong Su,

Yongsen Zheng,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Weixing and Liu, Yang and Chen, Binglin and Su, Jiandong and Zheng, Yongsen and Lin, Liang}, title = {Cross-modal Causal Relation Alignment for Video Question Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24087-24096} }
Words or Vision: Do Vision-Language Models Have Blind Faith in Text?: Ailin Deng,

Tri Cao,

Zhirui Chen,

Bryan Hooi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Ailin and Cao, Tri and Chen, Zhirui and Hooi, Bryan}, title = {Words or Vision: Do Vision-Language Models Have Blind Faith in Text?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3867-3876} }
Diffusion Renderer: Neural Inverse and Forward Rendering with Video Diffusion Models: Ruofan Liang,

Zan Gojcic,

Huan Ling,

Jacob Munkberg,

Jon Hasselgren,

Chih-Hao Lin,

Jun Gao,

Alexander Keller,

Nandita Vijaykumar,

Sanja Fidler,

Zian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Ruofan and Gojcic, Zan and Ling, Huan and Munkberg, Jacob and Hasselgren, Jon and Lin, Chih-Hao and Gao, Jun and Keller, Alexander and Vijaykumar, Nandita and Fidler, Sanja and Wang, Zian}, title = {Diffusion Renderer: Neural Inverse and Forward Rendering with Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26069-26080} }
Harnessing Frequency Spectrum Insights for Image Copyright Protection Against Diffusion Models: Zhenguang Liu,

Chao Shuai,

Shaojing Fan,

Ziping Dong,

Jinwu Hu,

Zhongjie Ba,

Kui Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhenguang and Shuai, Chao and Fan, Shaojing and Dong, Ziping and Hu, Jinwu and Ba, Zhongjie and Ren, Kui}, title = {Harnessing Frequency Spectrum Insights for Image Copyright Protection Against Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18653-18662} }
Learning to Detect Objects from Multi-Agent LiDAR Scans without Manual Labels: Qiming Xia,

Wenkai Lin,

Haoen Xiang,

Xun Huang,

Siheng Chen,

Zhen Dong,

Cheng Wang,

Chenglu Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Qiming and Lin, Wenkai and Xiang, Haoen and Huang, Xun and Chen, Siheng and Dong, Zhen and Wang, Cheng and Wen, Chenglu}, title = {Learning to Detect Objects from Multi-Agent LiDAR Scans without Manual Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1418-1428} }
DeepLA-Net: Very Deep Local Aggregation Networks for Point Cloud Analysis: Ziyin Zeng,

Mingyue Dong,

Jian Zhou,

Huan Qiu,

Zhen Dong,

Man Luo,

Bijun Li; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Ziyin and Dong, Mingyue and Zhou, Jian and Qiu, Huan and Dong, Zhen and Luo, Man and Li, Bijun}, title = {DeepLA-Net: Very Deep Local Aggregation Networks for Point Cloud Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1330-1341} }
Multi-Layer Visual Feature Fusion in Multimodal LLMs: Methods, Analysis, and Best Practices: Junyan Lin,

Haoran Chen,

Yue Fan,

Yingqi Fan,

Xin Jin,

Hui Su,

Jinlan Fu,

Xiaoyu Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Junyan and Chen, Haoran and Fan, Yue and Fan, Yingqi and Jin, Xin and Su, Hui and Fu, Jinlan and Shen, Xiaoyu}, title = {Multi-Layer Visual Feature Fusion in Multimodal LLMs: Methods, Analysis, and Best Practices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4156-4166} }
APHQ-ViT: Post-Training Quantization with Average Perturbation Hessian Based Reconstruction for Vision Transformers: Zhuguanyu Wu,

Jiayi Zhang,

Jiaxin Chen,

Jinyang Guo,

Di Huang,

Yunhong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Zhuguanyu and Zhang, Jiayi and Chen, Jiaxin and Guo, Jinyang and Huang, Di and Wang, Yunhong}, title = {APHQ-ViT: Post-Training Quantization with Average Perturbation Hessian Based Reconstruction for Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9686-9695} }
AdaptCMVC: Robust Adaption to Incremental Views in Continual Multi-view Clustering: Jing Wang,

Songhe Feng,

Kristoffer Knutsen Wickstrøm,

Michael C. Kampffmeyer; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jing and Feng, Songhe and Wickstr{\o}m, Kristoffer Knutsen and Kampffmeyer, Michael C.}, title = {AdaptCMVC: Robust Adaption to Incremental Views in Continual Multi-view Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10285-10294} }
Omni-Scene: Omni-Gaussian Representation for Ego-Centric Sparse-View Scene Reconstruction: Dongxu Wei,

Zhiqi Li,

Peidong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Dongxu and Li, Zhiqi and Liu, Peidong}, title = {Omni-Scene: Omni-Gaussian Representation for Ego-Centric Sparse-View Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22317-22327} }
3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion: Zhaoxi Chen,

Jiaxiang Tang,

Yuhao Dong,

Ziang Cao,

Fangzhou Hong,

Yushi Lan,

Tengfei Wang,

Haozhe Xie,

Tong Wu,

Shunsuke Saito,

Liang Pan,

Dahua Lin,

Ziwei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhaoxi and Tang, Jiaxiang and Dong, Yuhao and Cao, Ziang and Hong, Fangzhou and Lan, Yushi and Wang, Tengfei and Xie, Haozhe and Wu, Tong and Saito, Shunsuke and Pan, Liang and Lin, Dahua and Liu, Ziwei}, title = {3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26576-26586} }
UA-Pose: Uncertainty-Aware 6D Object Pose Estimation and Online Object Completion with Partial References: Ming-Feng Li,

Xin Yang,

Fu-En Wang,

Hritam Basak,

Yuyin Sun,

Shreekant Gayaka,

Min Sun,

Cheng-Hao Kuo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ming-Feng and Yang, Xin and Wang, Fu-En and Basak, Hritam and Sun, Yuyin and Gayaka, Shreekant and Sun, Min and Kuo, Cheng-Hao}, title = {UA-Pose: Uncertainty-Aware 6D Object Pose Estimation and Online Object Completion with Partial References}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1180-1189} }
Missing Target-Relevant Information Prediction with World Model for Accurate Zero-Shot Composed Image Retrieval: Yuanmin Tang,

Jing Yu,

Keke Gai,

Jiamin Zhuang,

Gang Xiong,

Gaopeng Gou,

Qi Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yuanmin and Yu, Jing and Gai, Keke and Zhuang, Jiamin and Xiong, Gang and Gou, Gaopeng and Wu, Qi}, title = {Missing Target-Relevant Information Prediction with World Model for Accurate Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24785-24795} }
Binarized Mamba-Transformer for Lightweight Quad Bayer HybridEVS Demosaicing: Shiyang Zhou,

Haijin Zeng,

Yunfan Lu,

Tong Shao,

Ke Tang,

Yongyong Chen,

Jie Liu,

Jingyong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shiyang and Zeng, Haijin and Lu, Yunfan and Shao, Tong and Tang, Ke and Chen, Yongyong and Liu, Jie and Su, Jingyong}, title = {Binarized Mamba-Transformer for Lightweight Quad Bayer HybridEVS Demosaicing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8817-8827} }
DiffSensei: Bridging Multi-Modal LLMs and Diffusion Models for Customized Manga Generation: Jianzong Wu,

Chao Tang,

Jingbo Wang,

Yanhong Zeng,

Xiangtai Li,

Yunhai Tong; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jianzong and Tang, Chao and Wang, Jingbo and Zeng, Yanhong and Li, Xiangtai and Tong, Yunhai}, title = {DiffSensei: Bridging Multi-Modal LLMs and Diffusion Models for Customized Manga Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28684-28693} }
Narrating the Video: Boosting Text-Video Retrieval via Comprehensive Utilization of Frame-Level Captions: Chan Hur,

Jeong-hun Hong,

Dong-hun Lee,

Dabin Kang,

Semin Myeong,

Sang-hyo Park,

Hyeyoung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hur_2025_CVPR, author = {Hur, Chan and Hong, Jeong-hun and Lee, Dong-hun and Kang, Dabin and Myeong, Semin and Park, Sang-hyo and Park, Hyeyoung}, title = {Narrating the Video: Boosting Text-Video Retrieval via Comprehensive Utilization of Frame-Level Captions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24077-24086} }
IDEA-Bench: How Far are Generative Models from Professional Designing?: Chen Liang,

Lianghua Huang,

Jingwu Fang,

Huanzhang Dou,

Wei Wang,

Zhi-Fan Wu,

Yupeng Shi,

Junge Zhang,

Xin Zhao,

Yu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Chen and Huang, Lianghua and Fang, Jingwu and Dou, Huanzhang and Wang, Wei and Wu, Zhi-Fan and Shi, Yupeng and Zhang, Junge and Zhao, Xin and Liu, Yu}, title = {IDEA-Bench: How Far are Generative Models from Professional Designing?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18541-18551} }
Interpretable Image Classification via Non-parametric Part Prototype Learning: Zhijie Zhu,

Lei Fan,

Maurice Pagnucco,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zhijie and Fan, Lei and Pagnucco, Maurice and Song, Yang}, title = {Interpretable Image Classification via Non-parametric Part Prototype Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9762-9771} }
PhD: A ChatGPT-Prompted Visual Hallucination Evaluation Dataset: Jiazhen Liu,

Yuhan Fu,

Ruobing Xie,

Runquan Xie,

Xingwu Sun,

Fengzong Lian,

Zhanhui Kang,

Xirong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiazhen and Fu, Yuhan and Xie, Ruobing and Xie, Runquan and Sun, Xingwu and Lian, Fengzong and Kang, Zhanhui and Li, Xirong}, title = {PhD: A ChatGPT-Prompted Visual Hallucination Evaluation Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19857-19866} }
CARL: A Framework for Equivariant Image Registration: Hastings Greer,

Lin Tian,

François-Xavier Vialard,

Roland Kwitt,

Raul San Jose Estepar,

Marc Niethammer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Greer_2025_CVPR, author = {Greer, Hastings and Tian, Lin and Vialard, Fran\c{c}ois-Xavier and Kwitt, Roland and Estepar, Raul San Jose and Niethammer, Marc}, title = {CARL: A Framework for Equivariant Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26014-26023} }
ClimbingCap: Multi-Modal Dataset and Method for Rock Climbing in World Coordinate: Ming Yan,

Xincheng Lin,

Yuhua Luo,

Shuqi Fan,

Yudi Dai,

Qixin Zhong,

Lincai Zhong,

Yuexin Ma,

Lan Xu,

Chenglu Wen,

Siqi Shen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Ming and Lin, Xincheng and Luo, Yuhua and Fan, Shuqi and Dai, Yudi and Zhong, Qixin and Zhong, Lincai and Ma, Yuexin and Xu, Lan and Wen, Chenglu and Shen, Siqi and Wang, Cheng}, title = {ClimbingCap: Multi-Modal Dataset and Method for Rock Climbing in World Coordinate}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12312-12323} }
DAGSM: Disentangled Avatar Generation with GS-enhanced Mesh: Jingyu Zhuang,

Di Kang,

Linchao Bao,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Jingyu and Kang, Di and Bao, Linchao and Lin, Liang and Li, Guanbin}, title = {DAGSM: Disentangled Avatar Generation with GS-enhanced Mesh}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {292-303} }
Estimating Body and Hand Motion in an Ego-sensed World: Brent Yi,

Vickie Ye,

Maya Zheng,

Yunqi Li,

Lea Müller,

Georgios Pavlakos,

Yi Ma,

Jitendra Malik,

Angjoo Kanazawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yi_2025_CVPR, author = {Yi, Brent and Ye, Vickie and Zheng, Maya and Li, Yunqi and M\"uller, Lea and Pavlakos, Georgios and Ma, Yi and Malik, Jitendra and Kanazawa, Angjoo}, title = {Estimating Body and Hand Motion in an Ego-sensed World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7072-7084} }
A Bias-Free Training Paradigm for More General AI-generated Image Detection: Fabrizio Guillaro,

Giada Zingarini,

Ben Usman,

Avneesh Sud,

Davide Cozzolino,

Luisa Verdoliva; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guillaro_2025_CVPR, author = {Guillaro, Fabrizio and Zingarini, Giada and Usman, Ben and Sud, Avneesh and Cozzolino, Davide and Verdoliva, Luisa}, title = {A Bias-Free Training Paradigm for More General AI-generated Image Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18685-18694} }
FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding: Thanh-Dat Truong,

Utsav Prabhu,

Bhiksha Raj,

Jackson Cothren,

Khoa Luu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Truong_2025_CVPR, author = {Truong, Thanh-Dat and Prabhu, Utsav and Raj, Bhiksha and Cothren, Jackson and Luu, Khoa}, title = {FALCON: Fairness Learning via Contrastive Attention Approach to Continual Semantic Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15065-15075} }
Certified Human Trajectory Prediction: Mohammadhossein Bahari,

Saeed Saadatnejad,

Amirhossein Askari Farsangi,

Seyed-Mohsen Moosavi-Dezfooli,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bahari_2025_CVPR, author = {Bahari, Mohammadhossein and Saadatnejad, Saeed and Farsangi, Amirhossein Askari and Moosavi-Dezfooli, Seyed-Mohsen and Alahi, Alexandre}, title = {Certified Human Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12301-12311} }
Evaluating Vision-Language Models as Evaluators in Path Planning: Mohamed Aghzal,

Xiang Yue,

Erion Plaku,

Ziyu Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aghzal_2025_CVPR, author = {Aghzal, Mohamed and Yue, Xiang and Plaku, Erion and Yao, Ziyu}, title = {Evaluating Vision-Language Models as Evaluators in Path Planning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6886-6897} }
Free on the Fly: Enhancing Flexibility in Test-Time Adaptation with Online EM: Qiyuan Dai,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyuan and Yang, Sibei}, title = {Free on the Fly: Enhancing Flexibility in Test-Time Adaptation with Online EM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9538-9548} }
Transformers without Normalization: Jiachen Zhu,

Xinlei Chen,

Kaiming He,

Yann LeCun,

Zhuang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jiachen and Chen, Xinlei and He, Kaiming and LeCun, Yann and Liu, Zhuang}, title = {Transformers without Normalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14901-14911} }
SGC-Net: Stratified Granular Comparison Network for Open-Vocabulary HOI Detection: Xin Lin,

Chong Shi,

Zuopeng Yang,

Haojin Tang,

Zhili Zhou; [pdf]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Xin and Shi, Chong and Yang, Zuopeng and Tang, Haojin and Zhou, Zhili}, title = {SGC-Net: Stratified Granular Comparison Network for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4539-4549} }
Galaxy Walker: Geometry-aware VLMs For Galaxy-scale Understanding: Tianyu Chen,

Xingcheng Fu,

Yisen Gao,

Haodong Qian,

Yuecen Wei,

Kun Yan,

Haoyi Zhou,

Jianxin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Tianyu and Fu, Xingcheng and Gao, Yisen and Qian, Haodong and Wei, Yuecen and Yan, Kun and Zhou, Haoyi and Li, Jianxin}, title = {Galaxy Walker: Geometry-aware VLMs For Galaxy-scale Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4112-4121} }
HiPART: Hierarchical Pose AutoRegressive Transformer for Occluded 3D Human Pose Estimation: Hongwei Zheng,

Han Li,

Wenrui Dai,

Ziyang Zheng,

Chenglin Li,

Junni Zou,

Hongkai Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Hongwei and Li, Han and Dai, Wenrui and Zheng, Ziyang and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {HiPART: Hierarchical Pose AutoRegressive Transformer for Occluded 3D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16807-16817} }
SnowMaster: Comprehensive Real-world Image Desnowing via MLLM with Multi-Model Feedback Optimization: Jianyu Lai,

Sixiang Chen,

Yunlong Lin,

Tian Ye,

Yun Liu,

Song Fei,

Zhaohu Xing,

Hongtao Wu,

Weiming Wang,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Jianyu and Chen, Sixiang and Lin, Yunlong and Ye, Tian and Liu, Yun and Fei, Song and Xing, Zhaohu and Wu, Hongtao and Wang, Weiming and Zhu, Lei}, title = {SnowMaster: Comprehensive Real-world Image Desnowing via MLLM with Multi-Model Feedback Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4302-4312} }
From Faces to Voices: Learning Hierarchical Representations for High-quality Video-to-Speech: Ji-Hoon Kim,

Jeongsoo Choi,

Jaehun Kim,

Chaeyoung Jung,

Joon Son Chung; [pdf] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Ji-Hoon and Choi, Jeongsoo and Kim, Jaehun and Jung, Chaeyoung and Chung, Joon Son}, title = {From Faces to Voices: Learning Hierarchical Representations for High-quality Video-to-Speech}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15874-15884} }
DFM: Differentiable Feature Matching for Anomaly Detection: Sheng Wu,

Yimi Wang,

Xudong Liu,

Yuguang Yang,

Runqi Wang,

Guodong Guo,

David Doermann,

Baochang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Sheng and Wang, Yimi and Liu, Xudong and Yang, Yuguang and Wang, Runqi and Guo, Guodong and Doermann, David and Zhang, Baochang}, title = {DFM: Differentiable Feature Matching for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15224-15233} }
FlashGS: Efficient 3D Gaussian Splatting for Large-scale and High-resolution Rendering: Guofeng Feng,

Siyan Chen,

Rong Fu,

Zimu Liao,

Yi Wang,

Tao Liu,

Boni Hu,

Linning Xu,

Zhilin Pei,

Hengjie Li,

Xiuhong Li,

Ninghui Sun,

Xingcheng Zhang,

Bo Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Guofeng and Chen, Siyan and Fu, Rong and Liao, Zimu and Wang, Yi and Liu, Tao and Hu, Boni and Xu, Linning and Pei, Zhilin and Li, Hengjie and Li, Xiuhong and Sun, Ninghui and Zhang, Xingcheng and Dai, Bo}, title = {FlashGS: Efficient 3D Gaussian Splatting for Large-scale and High-resolution Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26652-26662} }
PointSR: Self-Regularized Point Supervision for Drone-View Object Detection: Weizhuo Li,

Yue Xi,

Wenjing Jia,

Zehao Zhang,

Fei Li,

Xiangzeng Liu,

Qiguang Miao; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Weizhuo and Xi, Yue and Jia, Wenjing and Zhang, Zehao and Li, Fei and Liu, Xiangzeng and Miao, Qiguang}, title = {PointSR: Self-Regularized Point Supervision for Drone-View Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11707-11716} }
Exploring Timeline Control for Facial Motion Generation: Yifeng Ma,

Jinwei Qi,

Chaonan Ji,

Peng Zhang,

Bang Zhang,

Zhidong Deng,

Liefeng Bo; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Yifeng and Qi, Jinwei and Ji, Chaonan and Zhang, Peng and Zhang, Bang and Deng, Zhidong and Bo, Liefeng}, title = {Exploring Timeline Control for Facial Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1940-1950} }
v-CLR: View-Consistent Learning for Open-World Instance Segmentation: Chang-Bin Zhang,

Jinhong Ni,

Yujie Zhong,

Kai Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chang-Bin and Ni, Jinhong and Zhong, Yujie and Han, Kai}, title = {v-CLR: View-Consistent Learning for Open-World Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20307-20317} }
Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models: Ronghuan Wu,

Wanchao Su,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Ronghuan and Su, Wanchao and Liao, Jing}, title = {Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23690-23700} }
GAF: Gaussian Avatar Reconstruction from Monocular Videos via Multi-view Diffusion: Jiapeng Tang,

Davide Davoli,

Tobias Kirschstein,

Liam Schoneveld,

Matthias Nießner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Jiapeng and Davoli, Davide and Kirschstein, Tobias and Schoneveld, Liam and Nie{\ss}ner, Matthias}, title = {GAF: Gaussian Avatar Reconstruction from Monocular Videos via Multi-view Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5546-5558} }
Reloc3r: Large-Scale Training of Relative Camera Pose Regression for Generalizable, Fast, and Accurate Visual Localization: Siyan Dong,

Shuzhe Wang,

Shaohui Liu,

Lulu Cai,

Qingnan Fan,

Juho Kannala,

Yanchao Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Siyan and Wang, Shuzhe and Liu, Shaohui and Cai, Lulu and Fan, Qingnan and Kannala, Juho and Yang, Yanchao}, title = {Reloc3r: Large-Scale Training of Relative Camera Pose Regression for Generalizable, Fast, and Accurate Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16739-16752} }
AI-Face: A Million-Scale Demographically Annotated AI-Generated Face Dataset and Fairness Benchmark: Li Lin,

Santosh Santosh,

Mingyang Wu,

Xin Wang,

Shu Hu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Li and Santosh, Santosh and Wu, Mingyang and Wang, Xin and Hu, Shu}, title = {AI-Face: A Million-Scale Demographically Annotated AI-Generated Face Dataset and Fairness Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3503-3515} }
Inference-Scale Complexity in ANN-SNN Conversion for High-Performance and Low-Power Applications: Tong Bu,

Maohua Li,

Zhaofei Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bu_2025_CVPR, author = {Bu, Tong and Li, Maohua and Yu, Zhaofei}, title = {Inference-Scale Complexity in ANN-SNN Conversion for High-Performance and Low-Power Applications}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24387-24397} }
Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation: Chengyue Wu,

Xiaokang Chen,

Zhiyu Wu,

Yiyang Ma,

Xingchao Liu,

Zizheng Pan,

Wen Liu,

Zhenda Xie,

Xingkai Yu,

Chong Ruan,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Chengyue and Chen, Xiaokang and Wu, Zhiyu and Ma, Yiyang and Liu, Xingchao and Pan, Zizheng and Liu, Wen and Xie, Zhenda and Yu, Xingkai and Ruan, Chong and Luo, Ping}, title = {Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12966-12977} }
MVDoppler-Pose: Multi-Modal Multi-View mmWave Sensing for Long-Distance Self-Occluded Human Walking Pose Estimation: Jaeho Choi,

Soheil Hor,

Shubo Yang,

Amin Arbabian; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Jaeho and Hor, Soheil and Yang, Shubo and Arbabian, Amin}, title = {MVDoppler-Pose: Multi-Modal Multi-View mmWave Sensing for Long-Distance Self-Occluded Human Walking Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27750-27759} }
TopNet: Transformer-Efficient Occupancy Prediction Network for Octree-Structured Point Cloud Geometry Compression: Xinjie Wang,

Yifan Zhang,

Ting Liu,

Xinpu Liu,

Ke Xu,

Jianwei Wan,

Yulan Guo,

Hanyun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xinjie and Zhang, Yifan and Liu, Ting and Liu, Xinpu and Xu, Ke and Wan, Jianwei and Guo, Yulan and Wang, Hanyun}, title = {TopNet: Transformer-Efficient Occupancy Prediction Network for Octree-Structured Point Cloud Geometry Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27305-27314} }
MagicArticulate: Make Your 3D Models Articulation-Ready: Chaoyue Song,

Jianfeng Zhang,

Xiu Li,

Fan Yang,

Yiwen Chen,

Zhongcong Xu,

Jun Hao Liew,

Xiaoyang Guo,

Fayao Liu,

Jiashi Feng,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Chaoyue and Zhang, Jianfeng and Li, Xiu and Yang, Fan and Chen, Yiwen and Xu, Zhongcong and Liew, Jun Hao and Guo, Xiaoyang and Liu, Fayao and Feng, Jiashi and Lin, Guosheng}, title = {MagicArticulate: Make Your 3D Models Articulation-Ready}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15998-16007} }
Gain from Neighbors: Boosting Model Robustness in the Wild via Adversarial Perturbations Toward Neighboring Classes: Zhou Yang,

Mingtao Feng,

Tao Huang,

Fangfang Wu,

Weisheng Dong,

Xin Li,

Guangming Shi; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhou and Feng, Mingtao and Huang, Tao and Wu, Fangfang and Dong, Weisheng and Li, Xin and Shi, Guangming}, title = {Gain from Neighbors: Boosting Model Robustness in the Wild via Adversarial Perturbations Toward Neighboring Classes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25497-25507} }
Enhancing Video-LLM Reasoning via Agent-of-Thoughts Distillation: Yudi Shi,

Shangzhe Di,

Qirui Chen,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Xie, Weidi}, title = {Enhancing Video-LLM Reasoning via Agent-of-Thoughts Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8523-8533} }
De^2Gaze: Deformable and Decoupled Representation Learning for 3D Gaze Estimation: Yunfeng Xiao,

Xiaowei Bai,

Baojun Chen,

Hao Su,

Hao He,

Liang Xie,

Erwei Yin; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yunfeng and Bai, Xiaowei and Chen, Baojun and Su, Hao and He, Hao and Xie, Liang and Yin, Erwei}, title = {De{\textasciicircum}2Gaze: Deformable and Decoupled Representation Learning for 3D Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3091-3100} }
ReCapture: Generative Video Camera Controls for User-Provided Videos using Masked Video Fine-Tuning: David Junhao Zhang,

Roni Paiss,

Shiran Zada,

Nikhil Karnad,

David E. Jacobs,

Yael Pritch,

Inbar Mosseri,

Mike Zheng Shou,

Neal Wadhwa,

Nataniel Ruiz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, David Junhao and Paiss, Roni and Zada, Shiran and Karnad, Nikhil and Jacobs, David E. and Pritch, Yael and Mosseri, Inbar and Shou, Mike Zheng and Wadhwa, Neal and Ruiz, Nataniel}, title = {ReCapture: Generative Video Camera Controls for User-Provided Videos using Masked Video Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2050-2062} }
M^3-VOS: Multi-Phase, Multi-Transition, and Multi-Scenery Video Object Segmentation: Zixuan Chen,

Jiaxin Li,

Junxuan Liang,

Liming Tan,

Yejie Guo,

Cewu Lu,

Yong-Lu Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Li, Jiaxin and Liang, Junxuan and Tan, Liming and Guo, Yejie and Lu, Cewu and Li, Yong-Lu}, title = {M{\textasciicircum}3-VOS: Multi-Phase, Multi-Transition, and Multi-Scenery Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29193-29202} }
Self-Expansion of Pre-trained Models with Mixture of Adapters for Continual Learning: Huiyi Wang,

Haodong Lu,

Lina Yao,

Dong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Huiyi and Lu, Haodong and Yao, Lina and Gong, Dong}, title = {Self-Expansion of Pre-trained Models with Mixture of Adapters for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10087-10098} }
Dual Prompting Image Restoration with Diffusion Transformers: Dehong Kong,

Fan Li,

Zhixin Wang,

Jiaqi Xu,

Renjing Pei,

Wenbo Li,

WenQi Ren; [pdf] [arXiv]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Dehong and Li, Fan and Wang, Zhixin and Xu, Jiaqi and Pei, Renjing and Li, Wenbo and Ren, WenQi}, title = {Dual Prompting Image Restoration with Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12809-12819} }
Brain-Inspired Spiking Neural Networks for Energy-Efficient Object Detection: Ziqi Li,

Tao Gao,

Yisheng An,

Ting Chen,

Jing Zhang,

Yuanbo Wen,

Mengkun Liu,

Qianxi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ziqi and Gao, Tao and An, Yisheng and Chen, Ting and Zhang, Jing and Wen, Yuanbo and Liu, Mengkun and Zhang, Qianxi}, title = {Brain-Inspired Spiking Neural Networks for Energy-Efficient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3552-3562} }
Medusa: A Multi-Scale High-order Contrastive Dual-Diffusion Approach for Multi-View Clustering: Liang Chen,

Zhe Xue,

Yawen Li,

Meiyu Liang,

Yan Wang,

Anton van den Hengel,

Yuankai Qi; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Liang and Xue, Zhe and Li, Yawen and Liang, Meiyu and Wang, Yan and van den Hengel, Anton and Qi, Yuankai}, title = {Medusa: A Multi-Scale High-order Contrastive Dual-Diffusion Approach for Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10295-10304} }
MambaOut: Do We Really Need Mamba for Vision?: Weihao Yu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Weihao and Wang, Xinchao}, title = {MambaOut: Do We Really Need Mamba for Vision?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4484-4496} }
Everything to the Synthetic: Diffusion-driven Test-time Adaptation via Synthetic-Domain Alignment: Jiayi Guo,

Junhao Zhao,

Chaoqun Du,

Yulin Wang,

Chunjiang Ge,

Zanlin Ni,

Shiji Song,

Humphrey Shi,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Jiayi and Zhao, Junhao and Du, Chaoqun and Wang, Yulin and Ge, Chunjiang and Ni, Zanlin and Song, Shiji and Shi, Humphrey and Huang, Gao}, title = {Everything to the Synthetic: Diffusion-driven Test-time Adaptation via Synthetic-Domain Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30503-30513} }
Multi-Granularity Class Prototype Topology Distillation for Class-Incremental Source-Free Unsupervised Domain Adaptation: Peihua Deng,

Jiehua Zhang,

Xichun Sheng,

Chenggang Yan,

Yaoqi Sun,

Ying Fu,

Liang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Peihua and Zhang, Jiehua and Sheng, Xichun and Yan, Chenggang and Sun, Yaoqi and Fu, Ying and Li, Liang}, title = {Multi-Granularity Class Prototype Topology Distillation for Class-Incremental Source-Free Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30566-30576} }
DepthCues: Evaluating Monocular Depth Perception in Large Vision Models: Duolikun Danier,

Mehmet Aygün,

Changjian Li,

Hakan Bilen,

Oisin Mac Aodha; [pdf] [supp]
[bibtex]
@InProceedings{Danier_2025_CVPR, author = {Danier, Duolikun and Ayg\"un, Mehmet and Li, Changjian and Bilen, Hakan and Mac Aodha, Oisin}, title = {DepthCues: Evaluating Monocular Depth Perception in Large Vision Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20049-20059} }
A Polarization-Aided Transformer for Image Deblurring via Motion Vector Decomposition: Duosheng Chen,

Shihao Zhou,

Jinshan Pan,

Jinglei Shi,

Lishen Qu,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Duosheng and Zhou, Shihao and Pan, Jinshan and Shi, Jinglei and Qu, Lishen and Yang, Jufeng}, title = {A Polarization-Aided Transformer for Image Deblurring via Motion Vector Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28061-28070} }
SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting: Jiajun Tang,

Fan Fei,

Zhihao Li,

Xiao Tang,

Shiyong Liu,

Youyu Chen,

Binxiao Huang,

Zhenyu Chen,

Xiaofei Wu,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Jiajun and Fei, Fan and Li, Zhihao and Tang, Xiao and Liu, Shiyong and Chen, Youyu and Huang, Binxiao and Chen, Zhenyu and Wu, Xiaofei and Shi, Boxin}, title = {SpecTRe-GS: Modeling Highly Specular Surfaces with Reflected Nearby Objects by Tracing Rays in 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16133-16142} }
Seurat: From Moving Points to Depth: Seokju Cho,

Jiahui Huang,

Seungryong Kim,

Joon-Young Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_CVPR, author = {Cho, Seokju and Huang, Jiahui and Kim, Seungryong and Lee, Joon-Young}, title = {Seurat: From Moving Points to Depth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7211-7221} }
AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360deg Unbounded Scene Inpainting: Chung-Ho Wu,

Yang-Jung Chen,

Ying-Huan Chen,

Jie-Ying Lee,

Bo-Hsu Ke,

Chun-Wei Tuan Mu,

Yi-Chuan Huang,

Chin-Yang Lin,

Min-Hung Chen,

Yen-Yu Lin,

Yu-Lun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Chung-Ho and Chen, Yang-Jung and Chen, Ying-Huan and Lee, Jie-Ying and Ke, Bo-Hsu and Mu, Chun-Wei Tuan and Huang, Yi-Chuan and Lin, Chin-Yang and Chen, Min-Hung and Lin, Yen-Yu and Liu, Yu-Lun}, title = {AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360deg Unbounded Scene Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16366-16376} }
Language-Guided Image Tokenization for Generation: Kaiwen Zha,

Lijun Yu,

Alireza Fathi,

David A. Ross,

Cordelia Schmid,

Dina Katabi,

Xiuye Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zha_2025_CVPR, author = {Zha, Kaiwen and Yu, Lijun and Fathi, Alireza and Ross, David A. and Schmid, Cordelia and Katabi, Dina and Gu, Xiuye}, title = {Language-Guided Image Tokenization for Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15713-15722} }
Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models: Qirui Jiao,

Daoyuan Chen,

Yilun Huang,

Bolin Ding,

Yaliang Li,

Ying Shen; [pdf] [supp]
[bibtex]
@InProceedings{Jiao_2025_CVPR, author = {Jiao, Qirui and Chen, Daoyuan and Huang, Yilun and Ding, Bolin and Li, Yaliang and Shen, Ying}, title = {Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9296-9307} }
CocoER: Aligning Multi-Level Feature by Competition and Coordination for Emotion Recognition: Xuli Shen,

Hua Cai,

Weilin Shen,

Qing Xu,

Dingding Yu,

Weifeng Ge,

Xiangyang Xue; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Xuli and Cai, Hua and Shen, Weilin and Xu, Qing and Yu, Dingding and Ge, Weifeng and Xue, Xiangyang}, title = {CocoER: Aligning Multi-Level Feature by Competition and Coordination for Emotion Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29591-29600} }
Hyperbolic Uncertainty-Aware Few-Shot Incremental Point Cloud Segmentation: Tanuj Sur,

Samrat Mukherjee,

Kaizer Rahaman,

Subhasis Chaudhuri,

Muhammad Haris Khan,

Biplab Banerjee; [pdf] [supp]
[bibtex]
@InProceedings{Sur_2025_CVPR, author = {Sur, Tanuj and Mukherjee, Samrat and Rahaman, Kaizer and Chaudhuri, Subhasis and Khan, Muhammad Haris and Banerjee, Biplab}, title = {Hyperbolic Uncertainty-Aware Few-Shot Incremental Point Cloud Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11810-11821} }
Enhancing Creative Generation on Stable Diffusion-based Models: Jiyeon Han,

Dahee Kwon,

Gayoung Lee,

Junho Kim,

Jaesik Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Jiyeon and Kwon, Dahee and Lee, Gayoung and Kim, Junho and Choi, Jaesik}, title = {Enhancing Creative Generation on Stable Diffusion-based Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28609-28618} }
The Devil is in the Prompts: Retrieval-Augmented Prompt Optimization for Text-to-Video Generation: Bingjie Gao,

Xinyu Gao,

Xiaoxue Wu,

Yujie Zhou,

Yu Qiao,

Li Niu,

Xinyuan Chen,

Yaohui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Bingjie and Gao, Xinyu and Wu, Xiaoxue and Zhou, Yujie and Qiao, Yu and Niu, Li and Chen, Xinyuan and Wang, Yaohui}, title = {The Devil is in the Prompts: Retrieval-Augmented Prompt Optimization for Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3173-3183} }
Denoising Functional Maps: Diffusion Models for Shape Correspondence: Aleksei Zhuravlev,

Zorah Lähner,

Vladislav Golyanik; [pdf] [supp]
[bibtex]
@InProceedings{Zhuravlev_2025_CVPR, author = {Zhuravlev, Aleksei and L\"ahner, Zorah and Golyanik, Vladislav}, title = {Denoising Functional Maps: Diffusion Models for Shape Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26899-26909} }
ProReflow: Progressive Reflow with Decomposed Velocity: Lei Ke,

Haohang Xu,

Xuefei Ning,

Yu Li,

Jiajun Li,

Haoling Li,

Yuxuan Lin,

Dongsheng Jiang,

Yujiu Yang,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Lei and Xu, Haohang and Ning, Xuefei and Li, Yu and Li, Jiajun and Li, Haoling and Lin, Yuxuan and Jiang, Dongsheng and Yang, Yujiu and Zhang, Linfeng}, title = {ProReflow: Progressive Reflow with Decomposed Velocity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28029-28038} }
DnLUT: Ultra-Efficient Color Image Denoising via Channel-Aware Lookup Tables: Sidi Yang,

Binxiao Huang,

Yulun Zhang,

Dahai Yu,

Yujiu Yang,

Ngai Wong; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Sidi and Huang, Binxiao and Zhang, Yulun and Yu, Dahai and Yang, Yujiu and Wong, Ngai}, title = {DnLUT: Ultra-Efficient Color Image Denoising via Channel-Aware Lookup Tables}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7582-7591} }
Devil is in the Detail: Towards Injecting Fine Details of Image Prompt in Image Generation via Conflict-free Guidance and Stratified Attention: Kyungmin Jo,

Jooyeol Yun,

Jaegul Choo; [pdf] [supp]
[bibtex]
@InProceedings{Jo_2025_CVPR, author = {Jo, Kyungmin and Yun, Jooyeol and Choo, Jaegul}, title = {Devil is in the Detail: Towards Injecting Fine Details of Image Prompt in Image Generation via Conflict-free Guidance and Stratified Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23595-23603} }
D^3-Human: Dynamic Disentangled Digital Human from Monocular Video: Honghu Chen,

Bo Peng,

Yunfan Tao,

Juyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Honghu and Peng, Bo and Tao, Yunfan and Zhang, Juyong}, title = {D{\textasciicircum}3-Human: Dynamic Disentangled Digital Human from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10836-10846} }
BiM-VFI: Bidirectional Motion Field-Guided Frame Interpolation for Video with Non-uniform Motions: Wonyong Seo,

Jihyong Oh,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Seo_2025_CVPR, author = {Seo, Wonyong and Oh, Jihyong and Kim, Munchurl}, title = {BiM-VFI: Bidirectional Motion Field-Guided Frame Interpolation for Video with Non-uniform Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7244-7253} }
Curriculum Coarse-to-Fine Selection for High-IPC Dataset Distillation: Yanda Chen,

Gongwei Chen,

Miao Zhang,

Weili Guan,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yanda and Chen, Gongwei and Zhang, Miao and Guan, Weili and Nie, Liqiang}, title = {Curriculum Coarse-to-Fine Selection for High-IPC Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20437-20446} }
BADGR: Bundle Adjustment Diffusion Conditioned by Gradients for Wide-Baseline Floor Plan Reconstruction: Yuguang Li,

Ivaylo Boyadzhiev,

Zixuan Liu,

Linda Shapiro,

Alex Colburn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuguang and Boyadzhiev, Ivaylo and Liu, Zixuan and Shapiro, Linda and Colburn, Alex}, title = {BADGR: Bundle Adjustment Diffusion Conditioned by Gradients for Wide-Baseline Floor Plan Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16785-16795} }
Three Cars Approaching within 100m! Enhancing Distant Geometry by Tri-Axis Voxel Scanning for Camera-based Semantic Scene Completion: Jongseong Bae,

Junwoo Ha,

Ha Young Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2025_CVPR, author = {Bae, Jongseong and Ha, Junwoo and Kim, Ha Young}, title = {Three Cars Approaching within 100m! Enhancing Distant Geometry by Tri-Axis Voxel Scanning for Camera-based Semantic Scene Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11939-11948} }
MetaShadow: Object-Centered Shadow Detection, Removal, and Synthesis: Tianyu Wang,

Jianming Zhang,

Haitian Zheng,

Zhihong Ding,

Scott Cohen,

Zhe Lin,

Wei Xiong,

Chi-Wing Fu,

Luis Figueroa,

Soo Ye Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tianyu and Zhang, Jianming and Zheng, Haitian and Ding, Zhihong and Cohen, Scott and Lin, Zhe and Xiong, Wei and Fu, Chi-Wing and Figueroa, Luis and Kim, Soo Ye}, title = {MetaShadow: Object-Centered Shadow Detection, Removal, and Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28252-28262} }
TANGO: Training-free Embodied AI Agents for Open-world Tasks: Filippo Ziliotto,

Tommaso Campari,

Luciano Serafini,

Lamberto Ballan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ziliotto_2025_CVPR, author = {Ziliotto, Filippo and Campari, Tommaso and Serafini, Luciano and Ballan, Lamberto}, title = {TANGO: Training-free Embodied AI Agents for Open-world Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24603-24613} }
SATA: Spatial Autocorrelation Token Analysis for Enhancing the Robustness of Vision Transformers: Nick Nikzad,

Yi Liao,

Yongsheng Gao,

Jun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nikzad_2025_CVPR, author = {Nikzad, Nick and Liao, Yi and Gao, Yongsheng and Zhou, Jun}, title = {SATA: Spatial Autocorrelation Token Analysis for Enhancing the Robustness of Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9730-9739} }
DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension: Xiaofu Chen,

Yaxin Luo,

Gen Luo,

Jiayi Ji,

Henghui Ding,

Yiyi Zhou; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xiaofu and Luo, Yaxin and Luo, Gen and Ji, Jiayi and Ding, Henghui and Zhou, Yiyi}, title = {DViN: Dynamic Visual Routing Network for Weakly Supervised Referring Expression Comprehension}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14347-14357} }
Nested Diffusion Models Using Hierarchical Latent Priors: Xiao Zhang,

Ruoxi Jiang,

Rebecca Willett,

Michael Maire; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiao and Jiang, Ruoxi and Willett, Rebecca and Maire, Michael}, title = {Nested Diffusion Models Using Hierarchical Latent Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2502-2512} }
A Theory of Learning Unified Model via Knowledge Integration from Label Space Varying Domains: Dexuan Zhang,

Thomas Westfechtel,

Tatsuya Harada; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dexuan and Westfechtel, Thomas and Harada, Tatsuya}, title = {A Theory of Learning Unified Model via Knowledge Integration from Label Space Varying Domains}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10142-10152} }
HiLoTs: High-Low Temporal Sensitive Representation Learning for Semi-Supervised LiDAR Segmentation in Autonomous Driving: R.D. Lin,

Pengcheng Weng,

Yinqiao Wang,

Han Ding,

Jinsong Han,

Fei Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, R.D. and Weng, Pengcheng and Wang, Yinqiao and Ding, Han and Han, Jinsong and Wang, Fei}, title = {HiLoTs: High-Low Temporal Sensitive Representation Learning for Semi-Supervised LiDAR Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1429-1438} }
Spiking Transformer with Spatial-Temporal Attention: Donghyun Lee,

Yuhang Li,

Youngeun Kim,

Shiting Xiao,

Priyadarshini Panda; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Donghyun and Li, Yuhang and Kim, Youngeun and Xiao, Shiting and Panda, Priyadarshini}, title = {Spiking Transformer with Spatial-Temporal Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13948-13958} }
Perceptual Video Compression with Neural Wrapping: Muhammad Umar Karim Khan,

Aaron Chadha,

Mohammad Ashraful Anam,

Yiannis Andreopoulos; [pdf] [supp]
[bibtex]
@InProceedings{Khan_2025_CVPR, author = {Khan, Muhammad Umar Karim and Chadha, Aaron and Anam, Mohammad Ashraful and Andreopoulos, Yiannis}, title = {Perceptual Video Compression with Neural Wrapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17743-17754} }
ViKIENet: Towards Efficient 3D Object Detection with Virtual Key Instance Enhanced Network: Zhuochen Yu,

Bijie Qiu,

Andy W. H. Khong; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Zhuochen and Qiu, Bijie and Khong, Andy W. H.}, title = {ViKIENet: Towards Efficient 3D Object Detection with Virtual Key Instance Enhanced Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11844-11853} }
DKDM: Data-Free Knowledge Distillation for Diffusion Models with Any Architecture: Qianlong Xiang,

Miao Zhang,

Yuzhang Shang,

Jianlong Wu,

Yan Yan,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_CVPR, author = {Xiang, Qianlong and Zhang, Miao and Shang, Yuzhang and Wu, Jianlong and Yan, Yan and Nie, Liqiang}, title = {DKDM: Data-Free Knowledge Distillation for Diffusion Models with Any Architecture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2955-2965} }
SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization: Hongrui Jia,

Chaoya Jiang,

Haiyang Xu,

Wei Ye,

Mengfan Dong,

Ming Yan,

Ji Zhang,

Fei Huang,

Shikun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Xu, Haiyang and Ye, Wei and Dong, Mengfan and Yan, Ming and Zhang, Ji and Huang, Fei and Zhang, Shikun}, title = {SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9361-9371} }
Stealthy Backdoor Attack in Self-Supervised Learning Vision Encoders for Large Vision Language Models: Zhaoyi Liu,

Huan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaoyi and Zhang, Huan}, title = {Stealthy Backdoor Attack in Self-Supervised Learning Vision Encoders for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25060-25070} }
Data-free Universal Adversarial Perturbation with Pseudo-semantic Prior: Chanhui Lee,

Yeonghwan Song,

Jeany Son; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Chanhui and Song, Yeonghwan and Son, Jeany}, title = {Data-free Universal Adversarial Perturbation with Pseudo-semantic Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13907-13916} }
Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization: Zefeng Zhang,

Hengzhu Tang,

Jiawei Sheng,

Zhenyu Zhang,

Yiming Ren,

Zhenyang Li,

Dawei Yin,

Duohe Ma,

Tingwen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zefeng and Tang, Hengzhu and Sheng, Jiawei and Zhang, Zhenyu and Ren, Yiming and Li, Zhenyang and Yin, Dawei and Ma, Duohe and Liu, Tingwen}, title = {Debiasing Multimodal Large Language Models via Noise-Aware Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9423-9433} }
SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes: Yuji Wang,

Haoran Xu,

Yong Liu,

Jiaze Li,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuji and Xu, Haoran and Liu, Yong and Li, Jiaze and Tang, Yansong}, title = {SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28932-28941} }
GIVEPose: Gradual Intra-class Variation Elimination for RGB-based Category-Level Object Pose Estimation: Ziqin Huang,

Gu Wang,

Chenyangguang Zhang,

Ruida Zhang,

Xiu Li,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Ziqin and Wang, Gu and Zhang, Chenyangguang and Zhang, Ruida and Li, Xiu and Ji, Xiangyang}, title = {GIVEPose: Gradual Intra-class Variation Elimination for RGB-based Category-Level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22055-22066} }
FRAME: Floor-aligned Representation for Avatar Motion from Egocentric Video: Andrea Boscolo Camiletto,

Jian Wang,

Eduardo Alvarado,

Rishabh Dabral,

Thabo Beeler,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Camiletto_2025_CVPR, author = {Camiletto, Andrea Boscolo and Wang, Jian and Alvarado, Eduardo and Dabral, Rishabh and Beeler, Thabo and Habermann, Marc and Theobalt, Christian}, title = {FRAME: Floor-aligned Representation for Avatar Motion from Egocentric Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17497-17507} }
Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch: Aneeshan Sain,

Subhajit Maity,

Pinaki Nath Chowdhury,

Shubhadeep Koley,

Ayan Kumar Bhunia,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sain_2025_CVPR, author = {Sain, Aneeshan and Maity, Subhajit and Chowdhury, Pinaki Nath and Koley, Shubhadeep and Bhunia, Ayan Kumar and Song, Yi-Zhe}, title = {Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28383-28393} }
Generalized Zero-Shot Classification via Semantics-Free Inter-Class Feature Generation: Libiao Chen,

Dong Nie,

Junjun Pan,

Jing Yan,

Zhenyu Tang; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Libiao and Nie, Dong and Pan, Junjun and Yan, Jing and Tang, Zhenyu}, title = {Generalized Zero-Shot Classification via Semantics-Free Inter-Class Feature Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20286-20295} }
Feat2GS: Probing Visual Foundation Models with Gaussian Splatting: Yue Chen,

Xingyu Chen,

Anpei Chen,

Gerard Pons-Moll,

Yuliang Xiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yue and Chen, Xingyu and Chen, Anpei and Pons-Moll, Gerard and Xiu, Yuliang}, title = {Feat2GS: Probing Visual Foundation Models with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6348-6361} }
Multi-Modal Aerial-Ground Cross-View Place Recognition with Neural ODEs: Sijie Wang,

Rui She,

Qiyu Kang,

Siqi Li,

Disheng Li,

Tianyu Geng,

Shangshu Yu,

Wee Peng Tay; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Sijie and She, Rui and Kang, Qiyu and Li, Siqi and Li, Disheng and Geng, Tianyu and Yu, Shangshu and Tay, Wee Peng}, title = {Multi-Modal Aerial-Ground Cross-View Place Recognition with Neural ODEs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11717-11728} }
Rethinking Decoder Design: Improving Biomarker Segmentation Using Depth-to-Space Restoration and Residual Linear Attention: Saad Wazir,

Daeyoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wazir_2025_CVPR, author = {Wazir, Saad and Kim, Daeyoung}, title = {Rethinking Decoder Design: Improving Biomarker Segmentation Using Depth-to-Space Restoration and Residual Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30861-30871} }
MaDCoW: Marginal Distortion Correction for Wide-Angle Photography with Arbitrary Objects: Kevin Zhang,

Jia-Bin Huang,

Jose Echevarria,

Stephen DiVerdi,

Aaron Hertzmann; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kevin and Huang, Jia-Bin and Echevarria, Jose and DiVerdi, Stephen and Hertzmann, Aaron}, title = {MaDCoW: Marginal Distortion Correction for Wide-Angle Photography with Arbitrary Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10923-10932} }
SynTab-LLaVA: Enhancing Multimodal Table Understanding with Decoupled Synthesis: Bangbang Zhou,

Zuan Gao,

Zixiao Wang,

Boqiang Zhang,

Yuxin Wang,

Zhineng Chen,

Hongtao Xie; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Bangbang and Gao, Zuan and Wang, Zixiao and Zhang, Boqiang and Wang, Yuxin and Chen, Zhineng and Xie, Hongtao}, title = {SynTab-LLaVA: Enhancing Multimodal Table Understanding with Decoupled Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24796-24806} }
Edit Away and My Face Will not Stay: Personal Biometric Defense against Malicious Generative Editing: Hanhui Wang,

Yihua Zhang,

Ruizheng Bai,

Yue Zhao,

Sijia Liu,

Zhengzhong Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hanhui and Zhang, Yihua and Bai, Ruizheng and Zhao, Yue and Liu, Sijia and Tu, Zhengzhong}, title = {Edit Away and My Face Will not Stay: Personal Biometric Defense against Malicious Generative Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23806-23816} }
Any6D: Model-free 6D Pose Estimation of Novel Objects: Taeyeop Lee,

Bowen Wen,

Minjun Kang,

Gyuree Kang,

In So Kweon,

Kuk-Jin Yoon; [pdf] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Taeyeop and Wen, Bowen and Kang, Minjun and Kang, Gyuree and Kweon, In So and Yoon, Kuk-Jin}, title = {Any6D: Model-free 6D Pose Estimation of Novel Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11633-11643} }
Improving Accuracy and Calibration via Differentiated Deep Mutual Learning: Han Liu,

Peng Cui,

Bingning Wang,

Weipeng Chen,

Yupeng Zhang,

Jun Zhu,

Xiaolin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Han and Cui, Peng and Wang, Bingning and Chen, Weipeng and Zhang, Yupeng and Zhu, Jun and Hu, Xiaolin}, title = {Improving Accuracy and Calibration via Differentiated Deep Mutual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25812-25821} }
DrVideo: Document Retrieval Based Long Video Understanding: Ziyu Ma,

Chenhui Gou,

Hengcan Shi,

Bin Sun,

Shutao Li,

Hamid Rezatofighi,

Jianfei Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Ziyu and Gou, Chenhui and Shi, Hengcan and Sun, Bin and Li, Shutao and Rezatofighi, Hamid and Cai, Jianfei}, title = {DrVideo: Document Retrieval Based Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18936-18946} }
Infighting in the Dark: Multi-Label Backdoor Attack in Federated Learning: Ye Li,

Yanchao Zhao,

Chengcheng Zhu,

Jiale Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ye and Zhao, Yanchao and Zhu, Chengcheng and Zhang, Jiale}, title = {Infighting in the Dark: Multi-Label Backdoor Attack in Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25770-25779} }
Buffer Anytime: Zero-Shot Video Depth and Normal from Image Priors: Zhengfei Kuang,

Tianyuan Zhang,

Kai Zhang,

Hao Tan,

Sai Bi,

Yiwei Hu,

Zexiang Xu,

Milos Hasan,

Gordon Wetzstein,

Fujun Luan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2025_CVPR, author = {Kuang, Zhengfei and Zhang, Tianyuan and Zhang, Kai and Tan, Hao and Bi, Sai and Hu, Yiwei and Xu, Zexiang and Hasan, Milos and Wetzstein, Gordon and Luan, Fujun}, title = {Buffer Anytime: Zero-Shot Video Depth and Normal from Image Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17660-17670} }
PSHuman: Photorealistic Single-image 3D Human Reconstruction using Cross-Scale Multiview Diffusion and Explicit Remeshing: Peng Li,

Wangguandong Zheng,

Yuan Liu,

Tao Yu,

Yangguang Li,

Xingqun Qi,

Xiaowei Chi,

Siyu Xia,

Yan-Pei Cao,

Wei Xue,

Wenhan Luo,

Yike Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Peng and Zheng, Wangguandong and Liu, Yuan and Yu, Tao and Li, Yangguang and Qi, Xingqun and Chi, Xiaowei and Xia, Siyu and Cao, Yan-Pei and Xue, Wei and Luo, Wenhan and Guo, Yike}, title = {PSHuman: Photorealistic Single-image 3D Human Reconstruction using Cross-Scale Multiview Diffusion and Explicit Remeshing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16008-16018} }
LSNet: See Large, Focus Small: Ao Wang,

Hui Chen,

Zijia Lin,

Jungong Han,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {LSNet: See Large, Focus Small}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9718-9729} }
DynamicScaler: Seamless and Scalable Video Generation for Panoramic Scenes: Jinxiu Liu,

Shaoheng Lin,

Yinxiao Li,

Ming-Hsuan Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jinxiu and Lin, Shaoheng and Li, Yinxiao and Yang, Ming-Hsuan}, title = {DynamicScaler: Seamless and Scalable Video Generation for Panoramic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6144-6153} }
Tartan IMU: A Light Foundation Model for Inertial Positioning in Robotics: Shibo Zhao,

Sifan Zhou,

Raphael Blanchard,

Yuheng Qiu,

Wenshan Wang,

Sebastian Scherer; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shibo and Zhou, Sifan and Blanchard, Raphael and Qiu, Yuheng and Wang, Wenshan and Scherer, Sebastian}, title = {Tartan IMU: A Light Foundation Model for Inertial Positioning in Robotics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22520-22529} }
Event Ellipsometer: Event-based Mueller-Matrix Video Imaging: Ryota Maeda,

Yunseong Moon,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maeda_2025_CVPR, author = {Maeda, Ryota and Moon, Yunseong and Baek, Seung-Hwan}, title = {Event Ellipsometer: Event-based Mueller-Matrix Video Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21804-21813} }
DocLayLLM: An Efficient Multi-modal Extension of Large Language Models for Text-rich Document Understanding: Wenhui Liao,

Jiapeng Wang,

Hongliang Li,

Chengyu Wang,

Jun Huang,

Lianwen Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Wenhui and Wang, Jiapeng and Li, Hongliang and Wang, Chengyu and Huang, Jun and Jin, Lianwen}, title = {DocLayLLM: An Efficient Multi-modal Extension of Large Language Models for Text-rich Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4038-4049} }
EDEN: Enhanced Diffusion for High-quality Large-motion Video Frame Interpolation: Zihao Zhang,

Haoran Chen,

Haoyu Zhao,

Guansong Lu,

Yanwei Fu,

Hang Xu,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihao and Chen, Haoran and Zhao, Haoyu and Lu, Guansong and Fu, Yanwei and Xu, Hang and Wu, Zuxuan}, title = {EDEN: Enhanced Diffusion for High-quality Large-motion Video Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2105-2115} }
Handling Spatial-Temporal Data Heterogeneity for Federated Continual Learning via Tail Anchor: Hao Yu,

Xin Yang,

Le Zhang,

Hanlin Gu,

Tianrui Li,

Lixin Fan,

Qiang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hao and Yang, Xin and Zhang, Le and Gu, Hanlin and Li, Tianrui and Fan, Lixin and Yang, Qiang}, title = {Handling Spatial-Temporal Data Heterogeneity for Federated Continual Learning via Tail Anchor}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4874-4883} }
DeSiRe-GS: 4D Street Gaussians for Static-Dynamic Decomposition and Surface Reconstruction for Urban Driving Scenes: Chensheng Peng,

Chengwei Zhang,

Yixiao Wang,

Chenfeng Xu,

Yichen Xie,

Wenzhao Zheng,

Kurt Keutzer,

Masayoshi Tomizuka,

Wei Zhan; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Chensheng and Zhang, Chengwei and Wang, Yixiao and Xu, Chenfeng and Xie, Yichen and Zheng, Wenzhao and Keutzer, Kurt and Tomizuka, Masayoshi and Zhan, Wei}, title = {DeSiRe-GS: 4D Street Gaussians for Static-Dynamic Decomposition and Surface Reconstruction for Urban Driving Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6782-6791} }
End-to-End HOI Reconstruction Transformer with Graph-based Encoding: Zhenrong Wang,

Qi Zheng,

Sihan Ma,

Maosheng Ye,

Yibing Zhan,

Dongjiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenrong and Zheng, Qi and Ma, Sihan and Ye, Maosheng and Zhan, Yibing and Li, Dongjiang}, title = {End-to-End HOI Reconstruction Transformer with Graph-based Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27706-27715} }
REWIND: Real-Time Egocentric Whole-Body Motion Diffusion with Exemplar-Based Identity Conditioning: Jihyun Lee,

Weipeng Xu,

Alexander Richard,

Shih-En Wei,

Shunsuke Saito,

Shaojie Bai,

Te-Li Wang,

Minhyuk Sung,

Tae-Kyun Kim,

Jason Saragih; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jihyun and Xu, Weipeng and Richard, Alexander and Wei, Shih-En and Saito, Shunsuke and Bai, Shaojie and Wang, Te-Li and Sung, Minhyuk and Kim, Tae-Kyun and Saragih, Jason}, title = {REWIND: Real-Time Egocentric Whole-Body Motion Diffusion with Exemplar-Based Identity Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7095-7104} }
Hiding Images in Diffusion Models by Editing Learned Score Functions: Haoyu Chen,

Yunqiao Yang,

Nan Zhong,

Kede Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Haoyu and Yang, Yunqiao and Zhong, Nan and Ma, Kede}, title = {Hiding Images in Diffusion Models by Editing Learned Score Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18663-18673} }
Disco4D: Disentangled 4D Human Generation and Animation from a Single Image: Hui En Pang,

Shuai Liu,

Zhongang Cai,

Lei Yang,

Tianwei Zhang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Hui En and Liu, Shuai and Cai, Zhongang and Yang, Lei and Zhang, Tianwei and Liu, Ziwei}, title = {Disco4D: Disentangled 4D Human Generation and Animation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26331-26344} }
DoraCycle: Domain-Oriented Adaptation of Unified Generative Model in Multimodal Cycles: Rui Zhao,

Weijia Mao,

Mike Zheng Shou; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Rui and Mao, Weijia and Shou, Mike Zheng}, title = {DoraCycle: Domain-Oriented Adaptation of Unified Generative Model in Multimodal Cycles}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2835-2846} }
WeatherGen: A Unified Diverse Weather Generator for LiDAR Point Clouds via Spider Mamba Diffusion: Yang Wu,

Yun Zhu,

Kaihua Zhang,

Jianjun Qian,

Jin Xie,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yang and Zhu, Yun and Zhang, Kaihua and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {WeatherGen: A Unified Diverse Weather Generator for LiDAR Point Clouds via Spider Mamba Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17019-17028} }
MUST: The First Dataset and Unified Framework for Multispectral UAV Single Object Tracking: Haolin Qin,

Tingfa Xu,

Tianhao Li,

Zhenxiang Chen,

Tao Feng,

Jianan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Haolin and Xu, Tingfa and Li, Tianhao and Chen, Zhenxiang and Feng, Tao and Li, Jianan}, title = {MUST: The First Dataset and Unified Framework for Multispectral UAV Single Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16882-16891} }
IDOL: Instant Photorealistic 3D Human Creation from a Single Image: Yiyu Zhuang,

Jiaxi Lv,

Hao Wen,

Qing Shuai,

Ailing Zeng,

Hao Zhu,

Shifeng Chen,

Yujiu Yang,

Xun Cao,

Wei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Yiyu and Lv, Jiaxi and Wen, Hao and Shuai, Qing and Zeng, Ailing and Zhu, Hao and Chen, Shifeng and Yang, Yujiu and Cao, Xun and Liu, Wei}, title = {IDOL: Instant Photorealistic 3D Human Creation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26308-26319} }
Tightening Robustness Verification of MaxPool-based Neural Networks via Minimizing the Over-Approximation Zone: Yuan Xiao,

Yuchen Chen,

Shiqing Ma,

Chunrong Fang,

Tongtong Bai,

Mingzheng Gu,

Yuxin Cheng,

Yanwei Chen,

Zhenyu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yuan and Chen, Yuchen and Ma, Shiqing and Fang, Chunrong and Bai, Tongtong and Gu, Mingzheng and Cheng, Yuxin and Chen, Yanwei and Chen, Zhenyu}, title = {Tightening Robustness Verification of MaxPool-based Neural Networks via Minimizing the Over-Approximation Zone}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20695-20705} }
SketchVideo: Sketch-based Video Generation and Editing: Feng-Lin Liu,

Hongbo Fu,

Xintao Wang,

Weicai Ye,

Pengfei Wan,

Di Zhang,

Lin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Feng-Lin and Fu, Hongbo and Wang, Xintao and Ye, Weicai and Wan, Pengfei and Zhang, Di and Gao, Lin}, title = {SketchVideo: Sketch-based Video Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23379-23390} }
PhysicsGen: Can Generative Models Learn from Images to Predict Complex Physical Relations?: Martin Spitznagel,

Jan Vaillant,

Janis Keuper; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Spitznagel_2025_CVPR, author = {Spitznagel, Martin and Vaillant, Jan and Keuper, Janis}, title = {PhysicsGen: Can Generative Models Learn from Images to Predict Complex Physical Relations?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11125-11134} }
Taste More, Taste Better: Diverse Data and Strong Model Boost Semi-Supervised Crowd Counting: Maochen Yang,

Zekun Li,

Jian Zhang,

Lei Qi,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Maochen and Li, Zekun and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Taste More, Taste Better: Diverse Data and Strong Model Boost Semi-Supervised Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24440-24451} }
Gaussian Splashing: Unified Particles for Versatile Motion Synthesis and Rendering: Yutao Feng,

Xiang Feng,

Yintong Shang,

Ying Jiang,

Chang Yu,

Zeshun Zong,

Tianjia Shao,

Hongzhi Wu,

Kun Zhou,

Chenfanfu Jiang,

Yin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Yutao and Feng, Xiang and Shang, Yintong and Jiang, Ying and Yu, Chang and Zong, Zeshun and Shao, Tianjia and Wu, Hongzhi and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {Gaussian Splashing: Unified Particles for Versatile Motion Synthesis and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {518-529} }
Improve Representation for Imbalanced Regression through Geometric Constraints: Zijian Dong,

Yilei Wu,

Chongyao Chen,

Yingtian Zou,

Yichi Zhang,

Juan Helen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Zijian and Wu, Yilei and Chen, Chongyao and Zou, Yingtian and Zhang, Yichi and Zhou, Juan Helen}, title = {Improve Representation for Imbalanced Regression through Geometric Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5082-5091} }
AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models: Xinghui Li,

Qichao Sun,

Pengze Zhang,

Fulong Ye,

Zhichao Liao,

Wanquan Feng,

Songtao Zhao,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xinghui and Sun, Qichao and Zhang, Pengze and Ye, Fulong and Liao, Zhichao and Feng, Wanquan and Zhao, Songtao and He, Qian}, title = {AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23723-23733} }
Spectral Informed Mamba for Robust Point Cloud Processing: Ali Bahri,

Moslem Yazdanpanah,

Mehrdad Noori,

Sahar Dastani,

Milad Cheraghalikhani,

Gustavo Adolfo Vargas Hakim,

David Osowiechi,

Farzad Beizaee,

Ismail Ben Ayed,

Christian Desrosiers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bahri_2025_CVPR, author = {Bahri, Ali and Yazdanpanah, Moslem and Noori, Mehrdad and Dastani, Sahar and Cheraghalikhani, Milad and Hakim, Gustavo Adolfo Vargas and Osowiechi, David and Beizaee, Farzad and Ben Ayed, Ismail and Desrosiers, Christian}, title = {Spectral Informed Mamba for Robust Point Cloud Processing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11799-11809} }
Latent Space Imaging: Matheus Souza,

Yidan Zheng,

Kaizhang Kang,

Yogeshwar Nath Mishra,

Qiang Fu,

Wolfgang Heidrich; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Souza_2025_CVPR, author = {Souza, Matheus and Zheng, Yidan and Kang, Kaizhang and Mishra, Yogeshwar Nath and Fu, Qiang and Heidrich, Wolfgang}, title = {Latent Space Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28295-28305} }
Balanced Direction from Multifarious Choices: Arithmetic Meta-Learning for Domain Generalization: Xiran Wang,

Jian Zhang,

Lei Qi,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xiran and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Balanced Direction from Multifarious Choices: Arithmetic Meta-Learning for Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30577-30587} }
Anatomical Consistency and Adaptive Prior-informed Transformation for Multi-contrast MR Image Synthesis via Diffusion Model: Yejee Shin,

Yeeun Lee,

Hanbyol Jang,

Geonhui Son,

Hyeongyu Kim,

Dosik Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Shin_2025_CVPR, author = {Shin, Yejee and Lee, Yeeun and Jang, Hanbyol and Son, Geonhui and Kim, Hyeongyu and Hwang, Dosik}, title = {Anatomical Consistency and Adaptive Prior-informed Transformation for Multi-contrast MR Image Synthesis via Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30918-30927} }
BlobGEN-Vid: Compositional Text-to-Video Generation with Blob Video Representations: Weixi Feng,

Chao Liu,

Sifei Liu,

William Yang Wang,

Arash Vahdat,

Weili Nie; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Weixi and Liu, Chao and Liu, Sifei and Wang, William Yang and Vahdat, Arash and Nie, Weili}, title = {BlobGEN-Vid: Compositional Text-to-Video Generation with Blob Video Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12989-12998} }
D2SP: Dynamic Dual-Stage Purification Framework for Dual Noise Mitigation in Vision-based Affective Recognition.: Haoran Wang,

Xinji Mai,

Zeng Tao,

Xuan Tong,

Junxiong Lin,

Yan Wang,

Jiawen Yu,

Shaoqi Yan,

Ziheng Zhou,

Wenqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haoran and Mai, Xinji and Tao, Zeng and Tong, Xuan and Lin, Junxiong and Wang, Yan and Yu, Jiawen and Yan, Shaoqi and Zhou, Ziheng and Zhang, Wenqiang}, title = {D2SP: Dynamic Dual-Stage Purification Framework for Dual Noise Mitigation in Vision-based Affective Recognition.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19218-19229} }
PartRM: Modeling Part-Level Dynamics with Large Cross-State Reconstruction Model: Mingju Gao,

Yike Pan,

Huan-ang Gao,

Zongzheng Zhang,

Wenyi Li,

Hao Dong,

Hao Tang,

Li Yi,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Mingju and Pan, Yike and Gao, Huan-ang and Zhang, Zongzheng and Li, Wenyi and Dong, Hao and Tang, Hao and Yi, Li and Zhao, Hao}, title = {PartRM: Modeling Part-Level Dynamics with Large Cross-State Reconstruction Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7004-7014} }
LaVin-DiT: Large Vision Diffusion Transformer: Zhaoqing Wang,

Xiaobo Xia,

Runnan Chen,

Dongdong Yu,

Changhu Wang,

Mingming Gong,

Tongliang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhaoqing and Xia, Xiaobo and Chen, Runnan and Yu, Dongdong and Wang, Changhu and Gong, Mingming and Liu, Tongliang}, title = {LaVin-DiT: Large Vision Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20060-20070} }
DiffFNO: Diffusion Fourier Neural Operator: Xiaoyi Liu,

Hao Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xiaoyi and Tang, Hao}, title = {DiffFNO: Diffusion Fourier Neural Operator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {150-160} }
CAP-Net: A Unified Network for 6D Pose and Size Estimation of Categorical Articulated Parts from a Single RGB-D Image: Jingshun Huang,

Haitao Lin,

Tianyu Wang,

Yanwei Fu,

Xiangyang Xue,

Yi Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jingshun and Lin, Haitao and Wang, Tianyu and Fu, Yanwei and Xue, Xiangyang and Zhu, Yi}, title = {CAP-Net: A Unified Network for 6D Pose and Size Estimation of Categorical Articulated Parts from a Single RGB-D Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11654-11664} }
SeCap: Self-Calibrating and Adaptive Prompts for Cross-view Person Re-Identification in Aerial-Ground Networks: Shining Wang,

Yunlong Wang,

Ruiqi Wu,

Bingliang Jiao,

Wenxuan Wang,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shining and Wang, Yunlong and Wu, Ruiqi and Jiao, Bingliang and Wang, Wenxuan and Wang, Peng}, title = {SeCap: Self-Calibrating and Adaptive Prompts for Cross-view Person Re-Identification in Aerial-Ground Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22119-22128} }
Zero-Shot Styled Text Image Generation, but Make It Autoregressive: Vittorio Pippi,

Fabio Quattrini,

Silvia Cascianelli,

Alessio Tonioni,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pippi_2025_CVPR, author = {Pippi, Vittorio and Quattrini, Fabio and Cascianelli, Silvia and Tonioni, Alessio and Cucchiara, Rita}, title = {Zero-Shot Styled Text Image Generation, but Make It Autoregressive}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7910-7919} }
Don't Shake the Wheel: Momentum-Aware Planning in End-to-End Autonomous Driving: Ziying Song,

Caiyan Jia,

Lin Liu,

Hongyu Pan,

Yongchang Zhang,

Junming Wang,

Xingyu Zhang,

Shaoqing Xu,

Lei Yang,

Yadan Luo; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Ziying and Jia, Caiyan and Liu, Lin and Pan, Hongyu and Zhang, Yongchang and Wang, Junming and Zhang, Xingyu and Xu, Shaoqing and Yang, Lei and Luo, Yadan}, title = {Don't Shake the Wheel: Momentum-Aware Planning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22432-22441} }
Leveraging Perturbation Robustness to Enhance Out-of-Distribution Detection: Wenxi Chen,

Raymond A. Yeh,

Shaoshuai Mou,

Yan Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Wenxi and Yeh, Raymond A. and Mou, Shaoshuai and Gu, Yan}, title = {Leveraging Perturbation Robustness to Enhance Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4724-4733} }
Neural Motion Simulator Pushing the Limit of World Models in Reinforcement Learning: Chenjie Hao,

Weyl Lu,

Yifan Xu,

Yubei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Chenjie and Lu, Weyl and Xu, Yifan and Chen, Yubei}, title = {Neural Motion Simulator Pushing the Limit of World Models in Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27608-27617} }
Aesthetic Post-Training Diffusion Models from Generic Preferences with Step-by-step Preference Optimization: Zhanhao Liang,

Yuhui Yuan,

Shuyang Gu,

Bohan Chen,

Tiankai Hang,

Mingxi Cheng,

Ji Li,

Liang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Zhanhao and Yuan, Yuhui and Gu, Shuyang and Chen, Bohan and Hang, Tiankai and Cheng, Mingxi and Li, Ji and Zheng, Liang}, title = {Aesthetic Post-Training Diffusion Models from Generic Preferences with Step-by-step Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13199-13208} }
Adversarial Diffusion Compression for Real-World Image Super-Resolution: Bin Chen,

Gehui Li,

Rongyuan Wu,

Xindong Zhang,

Jie Chen,

Jian Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Bin and Li, Gehui and Wu, Rongyuan and Zhang, Xindong and Chen, Jie and Zhang, Jian and Zhang, Lei}, title = {Adversarial Diffusion Compression for Real-World Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28208-28220} }
DiSciPLE: Learning Interpretable Programs for Scientific Visual Discovery: Utkarsh Mall,

Cheng Perng Phoo,

Mia Chiquier,

Bharath Hariharan,

Kavita Bala,

Carl Vondrick; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mall_2025_CVPR, author = {Mall, Utkarsh and Phoo, Cheng Perng and Chiquier, Mia and Hariharan, Bharath and Bala, Kavita and Vondrick, Carl}, title = {DiSciPLE: Learning Interpretable Programs for Scientific Visual Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29258-29267} }
SOLAMI: Social Vision-Language-Action Modeling for Immersive Interaction with 3D Autonomous Characters: Jianping Jiang,

Weiye Xiao,

Zhengyu Lin,

Huaizhong Zhang,

Tianxiang Ren,

Yang Gao,

Zhiqian Lin,

Zhongang Cai,

Lei Yang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jianping and Xiao, Weiye and Lin, Zhengyu and Zhang, Huaizhong and Ren, Tianxiang and Gao, Yang and Lin, Zhiqian and Cai, Zhongang and Yang, Lei and Liu, Ziwei}, title = {SOLAMI: Social Vision-Language-Action Modeling for Immersive Interaction with 3D Autonomous Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26887-26898} }
EntropyMark: Towards More Harmless Backdoor Watermark via Entropy-based Constraint for Open-source Dataset Copyright Protection: Ming Sun,

Rui Wang,

Zixuan Zhu,

Lihua Jing,

Yuanfang Guo; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Ming and Wang, Rui and Zhu, Zixuan and Jing, Lihua and Guo, Yuanfang}, title = {EntropyMark: Towards More Harmless Backdoor Watermark via Entropy-based Constraint for Open-source Dataset Copyright Protection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30692-30701} }
Adaptive Markup Language Generation for Contextually-Grounded Visual Document Understanding: Han Xiao,

Yina Xie,

Guanxin Tan,

Yinghao Chen,

Rui Hu,

Ke Wang,

Aojun Zhou,

Hao Li,

Hao Shao,

Xudong Lu,

Peng Gao,

Yafei Wen,

Xiaoxin Chen,

Shuai Ren,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Han and Xie, Yina and Tan, Guanxin and Chen, Yinghao and Hu, Rui and Wang, Ke and Zhou, Aojun and Li, Hao and Shao, Hao and Lu, Xudong and Gao, Peng and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {Adaptive Markup Language Generation for Contextually-Grounded Visual Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29558-29568} }
BARD-GS: Blur-Aware Reconstruction of Dynamic Scenes via Gaussian Splatting: Yiren Lu,

Yunlai Zhou,

Disheng Liu,

Tuo Liang,

Yu Yin; [pdf]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yiren and Zhou, Yunlai and Liu, Disheng and Liang, Tuo and Yin, Yu}, title = {BARD-GS: Blur-Aware Reconstruction of Dynamic Scenes via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16532-16542} }
SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing: Seokhyeon Hong,

Chaelin Kim,

Serin Yoon,

Junghyun Nam,

Sihun Cha,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Seokhyeon and Kim, Chaelin and Yoon, Serin and Nam, Junghyun and Cha, Sihun and Noh, Junyong}, title = {SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7158-7168} }
Towards Universal AI-Generated Image Detection by Variational Information Bottleneck Network: Haifeng Zhang,

Qinghui He,

Xiuli Bi,

Weisheng Li,

Bo Liu,

Bin Xiao; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haifeng and He, Qinghui and Bi, Xiuli and Li, Weisheng and Liu, Bo and Xiao, Bin}, title = {Towards Universal AI-Generated Image Detection by Variational Information Bottleneck Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23828-23837} }
HSI: A Holistic Style Injector for Arbitrary Style Transfer: Shuhao Zhang,

Hui Kang,

Yang Liu,

Fang Mei,

Hongjuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shuhao and Kang, Hui and Liu, Yang and Mei, Fang and Li, Hongjuan}, title = {HSI: A Holistic Style Injector for Arbitrary Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23433-23442} }
LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping: Pascal Chang,

Sergio Sancho,

Jingwei Tang,

Markus Gross,

Vinicius Azevedo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Pascal and Sancho, Sergio and Tang, Jingwei and Gross, Markus and Azevedo, Vinicius}, title = {LookingGlass: Generative Anamorphoses via Laplacian Pyramid Warping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24-33} }
V2V3D: View-to-View Denoised 3D Reconstruction for Light Field Microscopy: Jiayin Zhao,

Zhenqi Fu,

Tao Yu,

Hui Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jiayin and Fu, Zhenqi and Yu, Tao and Qiao, Hui}, title = {V2V3D: View-to-View Denoised 3D Reconstruction for Light Field Microscopy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26451-26461} }
DiN: Diffusion Model for Robust Medical VQA with Semantic Noisy Labels: Erjian Guo,

Zhen Zhao,

Zicheng Wang,

Tong Chen,

Yunyi Liu,

Luping Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Erjian and Zhao, Zhen and Wang, Zicheng and Chen, Tong and Liu, Yunyi and Zhou, Luping}, title = {DiN: Diffusion Model for Robust Medical VQA with Semantic Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14337-14346} }
Splatter-360: Generalizable 360 Gaussian Splatting for Wide-baseline Panoramic Images: Zheng Chen,

Chenming Wu,

Zhelun Shen,

Chen Zhao,

Weicai Ye,

Haocheng Feng,

Errui Ding,

Song-Hai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zheng and Wu, Chenming and Shen, Zhelun and Zhao, Chen and Ye, Weicai and Feng, Haocheng and Ding, Errui and Zhang, Song-Hai}, title = {Splatter-360: Generalizable 360 Gaussian Splatting for Wide-baseline Panoramic Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21590-21599} }
ShowMak3r: Compositional TV Show Reconstruction: Sangmin Kim,

Seunguk Do,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sangmin and Do, Seunguk and Park, Jaesik}, title = {ShowMak3r: Compositional TV Show Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {864-874} }
CADRef: Robust Out-of-Distribution Detection via Class-Aware Decoupled Relative Feature Leveraging: Zhiwei Ling,

Yachen Chang,

Hailiang Zhao,

Xinkui Zhao,

Kingsum Chow,

Shuiguang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ling_2025_CVPR, author = {Ling, Zhiwei and Chang, Yachen and Zhao, Hailiang and Zhao, Xinkui and Chow, Kingsum and Deng, Shuiguang}, title = {CADRef: Robust Out-of-Distribution Detection via Class-Aware Decoupled Relative Feature Leveraging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4968-4977} }
S^3-Face: SSS-Compliant Facial Reflectance Estimation via Diffusion Priors: Xingyu Ren,

Jiankang Deng,

Yuhao Cheng,

Wenhan Zhu,

Yichao Yan,

Xiaokang Yang,

Stefanos Zafeiriou,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Xingyu and Deng, Jiankang and Cheng, Yuhao and Zhu, Wenhan and Yan, Yichao and Yang, Xiaokang and Zafeiriou, Stefanos and Ma, Chao}, title = {S{\textasciicircum}3-Face: SSS-Compliant Facial Reflectance Estimation via Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16051-16060} }
FSBench: A Figure Skating Benchmark for Advancing Artistic Sports Understanding: Rong Gao,

Xin Liu,

Zhuozhao Hu,

Bohao Xing,

Baiqiang Xia,

Zitong Yu,

Heikki Kälviäinen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Rong and Liu, Xin and Hu, Zhuozhao and Xing, Bohao and Xia, Baiqiang and Yu, Zitong and K\"alvi\"ainen, Heikki}, title = {FSBench: A Figure Skating Benchmark for Advancing Artistic Sports Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13595-13605} }
Keep the Balance: A Parameter-Efficient Symmetrical Framework for RGB+X Semantic Segmentation: Jiaxin Cai,

Jingze Su,

Qi Li,

Wenjie Yang,

Shu Wang,

Tiesong Zhao,

Shengfeng He,

Wenxi Liu; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Jiaxin and Su, Jingze and Li, Qi and Yang, Wenjie and Wang, Shu and Zhao, Tiesong and He, Shengfeng and Liu, Wenxi}, title = {Keep the Balance: A Parameter-Efficient Symmetrical Framework for RGB+X Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10587-10598} }
VideoDirector: Precise Video Editing via Text-to-Video Models: Yukun Wang,

Longguang Wang,

Zhiyuan Ma,

Qibin Hu,

Kai Xu,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yukun and Wang, Longguang and Ma, Zhiyuan and Hu, Qibin and Xu, Kai and Guo, Yulan}, title = {VideoDirector: Precise Video Editing via Text-to-Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2589-2598} }
LLM-driven Multimodal and Multi-Identity Listening Head Generation: Peiwen Lai,

Weizhi Zhong,

Yipeng Qin,

Xiaohang Ren,

Baoyuan Wang,

Guanbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Peiwen and Zhong, Weizhi and Qin, Yipeng and Ren, Xiaohang and Wang, Baoyuan and Li, Guanbin}, title = {LLM-driven Multimodal and Multi-Identity Listening Head Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10656-10666} }
Towards Understanding How Knowledge Evolves in Large Vision-Language Models: Sudong Wang,

Yunjian Zhang,

Yao Zhu,

Jianing Li,

Zizhe Wang,

Yanwei Liu,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Sudong and Zhang, Yunjian and Zhu, Yao and Li, Jianing and Wang, Zizhe and Liu, Yanwei and Ji, Xiangyang}, title = {Towards Understanding How Knowledge Evolves in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29858-29868} }
A Unified, Resilient, and Explainable Adversarial Patch Detector: Vishesh Kumar,

Akshay Agarwal; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2025_CVPR, author = {Kumar, Vishesh and Agarwal, Akshay}, title = {A Unified, Resilient, and Explainable Adversarial Patch Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30387-30397} }
VISTA: Enhancing Long-Duration and High-Resolution Video Understanding by Video Spatiotemporal Augmentation: Weiming Ren,

Huan Yang,

Jie Min,

Cong Wei,

Wenhu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Weiming and Yang, Huan and Min, Jie and Wei, Cong and Chen, Wenhu}, title = {VISTA: Enhancing Long-Duration and High-Resolution Video Understanding by Video Spatiotemporal Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3804-3814} }
Structured 3D Latents for Scalable and Versatile 3D Generation: Jianfeng Xiang,

Zelong Lv,

Sicheng Xu,

Yu Deng,

Ruicheng Wang,

Bowen Zhang,

Dong Chen,

Xin Tong,

Jiaolong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_CVPR, author = {Xiang, Jianfeng and Lv, Zelong and Xu, Sicheng and Deng, Yu and Wang, Ruicheng and Zhang, Bowen and Chen, Dong and Tong, Xin and Yang, Jiaolong}, title = {Structured 3D Latents for Scalable and Versatile 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21469-21480} }
GA3CE: Unconstrained 3D Gaze Estimation with Gaze-Aware 3D Context Encoding: Yuki Kawana,

Shintaro Shiba,

Quan Kong,

Norimasa Kobori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kawana_2025_CVPR, author = {Kawana, Yuki and Shiba, Shintaro and Kong, Quan and Kobori, Norimasa}, title = {GA3CE: Unconstrained 3D Gaze Estimation with Gaze-Aware 3D Context Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3081-3090} }
Self-Cross Diffusion Guidance for Text-to-Image Synthesis of Similar Subjects: Weimin Qiu,

Jieke Wang,

Meng Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Weimin and Wang, Jieke and Tang, Meng}, title = {Self-Cross Diffusion Guidance for Text-to-Image Synthesis of Similar Subjects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23528-23538} }
RigGS: Rigging of 3D Gaussians for Modeling Articulated Objects in Videos: Yuxin Yao,

Zhi Deng,

Junhui Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Yuxin and Deng, Zhi and Hou, Junhui}, title = {RigGS: Rigging of 3D Gaussians for Modeling Articulated Objects in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5592-5601} }
Noise Modeling in One Hour: Minimizing Preparation Efforts for Self-supervised Low-Light RAW Image Denoising: Feiran Li,

Haiyang Jiang,

Daisuke Iso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Feiran and Jiang, Haiyang and Iso, Daisuke}, title = {Noise Modeling in One Hour: Minimizing Preparation Efforts for Self-supervised Low-Light RAW Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5699-5708} }
Adv-CPG: A Customized Portrait Generation Framework with Facial Adversarial Attacks: Junying Wang,

Hongyuan Zhang,

Yuan Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Junying and Zhang, Hongyuan and Yuan, Yuan}, title = {Adv-CPG: A Customized Portrait Generation Framework with Facial Adversarial Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21001-21010} }
Fish-Vista: A Multi-Purpose Dataset for Understanding & Identification of Traits from Images: Kazi Sajeed Mehrab,

M. Maruf,

Arka Daw,

Abhilash Neog,

Harish Babu Manogaran,

Mridul Khurana,

Zhenyang Feng,

Bahadir Altintas,

Yasin Bakis,

Elizabeth G Campolongo,

Matthew J Thompson,

Xiaojun Wang,

Hilmar Lapp,

Tanya Berger-Wolf,

Paula Mabee,

Henry Bart,

Wei-Lun Chao,

Wasila M Dahdul,

Anuj Karpatne; [pdf] [supp]
[bibtex]
@InProceedings{Mehrab_2025_CVPR, author = {Mehrab, Kazi Sajeed and Maruf, M. and Daw, Arka and Neog, Abhilash and Manogaran, Harish Babu and Khurana, Mridul and Feng, Zhenyang and Altintas, Bahadir and Bakis, Yasin and Campolongo, Elizabeth G and Thompson, Matthew J and Wang, Xiaojun and Lapp, Hilmar and Berger-Wolf, Tanya and Mabee, Paula and Bart, Henry and Chao, Wei-Lun and Dahdul, Wasila M and Karpatne, Anuj}, title = {Fish-Vista: A Multi-Purpose Dataset for Understanding \& Identification of Traits from Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24275-24285} }
High Dynamic Range Video Compression: A Large-Scale Benchmark Dataset and A Learned Bit-depth Scalable Compression Algorithm: Zhaoyi Tian,

Feifeng Wang,

Shiwei Wang,

Zihao Zhou,

Yao Zhu,

Liquan Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zhaoyi and Wang, Feifeng and Wang, Shiwei and Zhou, Zihao and Zhu, Yao and Shen, Liquan}, title = {High Dynamic Range Video Compression: A Large-Scale Benchmark Dataset and A Learned Bit-depth Scalable Compression Algorithm}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7320-7330} }
OffsetOPT: Explicit Surface Reconstruction without Normals: Huan Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Huan}, title = {OffsetOPT: Explicit Surface Reconstruction without Normals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11729-11738} }
PCM : Picard Consistency Model for Fast Parallel Sampling of Diffusion Models: Junhyuk So,

Jiwoong Shin,

Chaeyeon Jang,

Eunhyeok Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{So_2025_CVPR, author = {So, Junhyuk and Shin, Jiwoong and Jang, Chaeyeon and Park, Eunhyeok}, title = {PCM : Picard Consistency Model for Fast Parallel Sampling of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23313-23322} }
CoMapGS: Covisibility Map-based Gaussian Splatting for Sparse Novel View Synthesis: Youngkyoon Jang,

Eduardo Pérez-Pellitero; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Youngkyoon and P\'erez-Pellitero, Eduardo}, title = {CoMapGS: Covisibility Map-based Gaussian Splatting for Sparse Novel View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26779-26788} }
Any-Resolution AI-Generated Image Detection by Spectral Learning: Dimitrios Karageorgiou,

Symeon Papadopoulos,

Ioannis Kompatsiaris,

Efstratios Gavves; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karageorgiou_2025_CVPR, author = {Karageorgiou, Dimitrios and Papadopoulos, Symeon and Kompatsiaris, Ioannis and Gavves, Efstratios}, title = {Any-Resolution AI-Generated Image Detection by Spectral Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18706-18717} }
DivPrune: Diversity-based Visual Token Pruning for Large Multimodal Models: Saeed Ranjbar Alvar,

Gursimran Singh,

Mohammad Akbari,

Yong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alvar_2025_CVPR, author = {Alvar, Saeed Ranjbar and Singh, Gursimran and Akbari, Mohammad and Zhang, Yong}, title = {DivPrune: Diversity-based Visual Token Pruning for Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9392-9401} }
Training Data Provenance Verification: Did Your Model Use Synthetic Data from My Generative Model for Training?: Yuechen Xie,

Jie Song,

Huiqiong Wang,

Mingli Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yuechen and Song, Jie and Wang, Huiqiong and Song, Mingli}, title = {Training Data Provenance Verification: Did Your Model Use Synthetic Data from My Generative Model for Training?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23817-23827} }
3D-AVS: LiDAR-based 3D Auto-Vocabulary Segmentation: Weijie Wei,

Osman Ülger,

Fatemeh Karimi Nejadasl,

Theo Gevers,

Martin R. Oswald; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Weijie and \"Ulger, Osman and Nejadasl, Fatemeh Karimi and Gevers, Theo and Oswald, Martin R.}, title = {3D-AVS: LiDAR-based 3D Auto-Vocabulary Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8910-8920} }
STOP: Integrated Spatial-Temporal Dynamic Prompting for Video Understanding: Zichen Liu,

Kunlun Xu,

Bing Su,

Xu Zou,

Yuxin Peng,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zichen and Xu, Kunlun and Su, Bing and Zou, Xu and Peng, Yuxin and Zhou, Jiahuan}, title = {STOP: Integrated Spatial-Temporal Dynamic Prompting for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13776-13786} }
TimeTracker: Event-based Continuous Point Tracking for Video Frame Interpolation with Non-linear Motion: Haoyue Liu,

Jinghan Xu,

Yi Chang,

Hanyu Zhou,

Haozhi Zhao,

Lin Wang,

Luxin Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Haoyue and Xu, Jinghan and Chang, Yi and Zhou, Hanyu and Zhao, Haozhi and Wang, Lin and Yan, Luxin}, title = {TimeTracker: Event-based Continuous Point Tracking for Video Frame Interpolation with Non-linear Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17649-17659} }
Improving the Training of Data-Efficient GANs via Quality Aware Dynamic Discriminator Rejection Sampling: Zhaoyu Zhang,

Yang Hua,

Guanxiong Sun,

Hui Wang,

Seán McLoone; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhaoyu and Hua, Yang and Sun, Guanxiong and Wang, Hui and McLoone, Se\'an}, title = {Improving the Training of Data-Efficient GANs via Quality Aware Dynamic Discriminator Rejection Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30682-30691} }
Shading Meets Motion: Self-supervised Indoor 3D Reconstruction Via Simultaneous Shape-from-Shading and Structure-from-Motion: Guoyu Lu; [pdf]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Guoyu}, title = {Shading Meets Motion: Self-supervised Indoor 3D Reconstruction Via Simultaneous Shape-from-Shading and Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16508-16519} }
Believing is Seeing: Unobserved Object Detection using Generative Models: Subhransu S. Bhattacharjee,

Dylan Campbell,

Rahul Shome; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhattacharjee_2025_CVPR, author = {Bhattacharjee, Subhransu S. and Campbell, Dylan and Shome, Rahul}, title = {Believing is Seeing: Unobserved Object Detection using Generative Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19366-19377} }
MotionStone: Decoupled Motion Intensity Modulation with Diffusion Transformer for Image-to-Video Generation: Shuwei Shi,

Biao Gong,

Xi Chen,

Dandan Zheng,

Shuai Tan,

Zizheng Yang,

Yuyuan Li,

Jingwen He,

Kecheng Zheng,

Jingdong Chen,

Ming Yang,

Yinqiang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Shuwei and Gong, Biao and Chen, Xi and Zheng, Dandan and Tan, Shuai and Yang, Zizheng and Li, Yuyuan and He, Jingwen and Zheng, Kecheng and Chen, Jingdong and Yang, Ming and Zheng, Yinqiang}, title = {MotionStone: Decoupled Motion Intensity Modulation with Diffusion Transformer for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22864-22874} }
NLPrompt: Noise-Label Prompt Learning for Vision-Language Models: Bikang Pan,

Qun Li,

Xiaoying Tang,

Wei Huang,

Zhen Fang,

Feng Liu,

Jingya Wang,

Jingyi Yu,

Ye Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Bikang and Li, Qun and Tang, Xiaoying and Huang, Wei and Fang, Zhen and Liu, Feng and Wang, Jingya and Yu, Jingyi and Shi, Ye}, title = {NLPrompt: Noise-Label Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19963-19973} }
MEGA: Masked Generative Autoencoder for Human Mesh Recovery: Guénolé Fiche,

Simon Leglaive,

Xavier Alameda-Pineda,

Francesc Moreno-Noguer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fiche_2025_CVPR, author = {Fiche, Gu\'enol\'e and Leglaive, Simon and Alameda-Pineda, Xavier and Moreno-Noguer, Francesc}, title = {MEGA: Masked Generative Autoencoder for Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5366-5378} }
PBR-NeRF: Inverse Rendering with Physics-Based Neural Fields: Sean Wu,

Shamik Basu,

Tim Broedermann,

Luc Van Gool,

Christos Sakaridis; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Sean and Basu, Shamik and Broedermann, Tim and Van Gool, Luc and Sakaridis, Christos}, title = {PBR-NeRF: Inverse Rendering with Physics-Based Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10974-10984} }
Disentangling Safe and Unsafe Image Corruptions via Anisotropy and Locality: Ramchandran Muthukumar,

Ambar Pal,

Jeremias Sulam,

Rene Vidal; [pdf] [supp]
[bibtex]
@InProceedings{Muthukumar_2025_CVPR, author = {Muthukumar, Ramchandran and Pal, Ambar and Sulam, Jeremias and Vidal, Rene}, title = {Disentangling Safe and Unsafe Image Corruptions via Anisotropy and Locality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9954-9963} }
Prometheus: 3D-Aware Latent Diffusion Models for Feed-Forward Text-to-3D Scene Generation: Yuanbo Yang,

Jiahao Shao,

Xinyang Li,

Yujun Shen,

Andreas Geiger,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yuanbo and Shao, Jiahao and Li, Xinyang and Shen, Yujun and Geiger, Andreas and Liao, Yiyi}, title = {Prometheus: 3D-Aware Latent Diffusion Models for Feed-Forward Text-to-3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2857-2869} }
No Pains, More Gains: Recycling Sub-Salient Patches for Efficient High-Resolution Image Recognition: Rong Qin,

Xin Liu,

Xingyu Liu,

Jiaxuan Liu,

Jinglei Shi,

Liang Lin,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Rong and Liu, Xin and Liu, Xingyu and Liu, Jiaxuan and Shi, Jinglei and Lin, Liang and Yang, Jufeng}, title = {No Pains, More Gains: Recycling Sub-Salient Patches for Efficient High-Resolution Image Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14965-14975} }
SphereUFormer: A U-Shaped Transformer for Spherical 360 Perception: Yaniv Benny,

Lior Wolf; [pdf] [supp]
[bibtex]
@InProceedings{Benny_2025_CVPR, author = {Benny, Yaniv and Wolf, Lior}, title = {SphereUFormer: A U-Shaped Transformer for Spherical 360 Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {940-950} }
Advancing Generalizable Tumor Segmentation with Anomaly-Aware Open-Vocabulary Attention Maps and Frozen Foundation Diffusion Models: Yankai Jiang,

Peng Zhang,

Donglin Yang,

Yuan Tian,

Hai Lin,

Xiaosong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yankai and Zhang, Peng and Yang, Donglin and Tian, Yuan and Lin, Hai and Wang, Xiaosong}, title = {Advancing Generalizable Tumor Segmentation with Anomaly-Aware Open-Vocabulary Attention Maps and Frozen Foundation Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25971-25981} }
Towards Generalizable Scene Change Detection: Jae-Woo Kim,

Ue-Hwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jae-Woo and Kim, Ue-Hwan}, title = {Towards Generalizable Scene Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24463-24473} }
Beyond Clean Training Data: A Versatile and Model-Agnostic Framework for Out-of-Distribution Detection with Contaminated Training Data: Yuchuan Li,

Jae-Mo Kang,

Il-Min Kim; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuchuan and Kang, Jae-Mo and Kim, Il-Min}, title = {Beyond Clean Training Data: A Versatile and Model-Agnostic Framework for Out-of-Distribution Detection with Contaminated Training Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10183-10192} }
Incomplete Multi-modal Brain Tumor Segmentation via Learnable Sorting State Space Model: Zheyu Zhang,

Yayuan Lu,

Feipeng Ma,

Yueyi Zhang,

Huanjing Yue,

Xiaoyan Sun; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zheyu and Lu, Yayuan and Ma, Feipeng and Zhang, Yueyi and Yue, Huanjing and Sun, Xiaoyan}, title = {Incomplete Multi-modal Brain Tumor Segmentation via Learnable Sorting State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25982-25992} }
FedAWA: Adaptive Optimization of Aggregation Weights in Federated Learning Using Client Vectors: Changlong Shi,

He Zhao,

Bingjie Zhang,

Mingyuan Zhou,

Dandan Guo,

Yi Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Changlong and Zhao, He and Zhang, Bingjie and Zhou, Mingyuan and Guo, Dandan and Chang, Yi}, title = {FedAWA: Adaptive Optimization of Aggregation Weights in Federated Learning Using Client Vectors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30651-30660} }
FreeUV: Ground-Truth-Free Realistic Facial UV Texture Recovery via Cross-Assembly Inference Strategy: Xingchao Yang,

Takafumi Taketomi,

Yuki Endo,

Yoshihiro Kanamori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Xingchao and Taketomi, Takafumi and Endo, Yuki and Kanamori, Yoshihiro}, title = {FreeUV: Ground-Truth-Free Realistic Facial UV Texture Recovery via Cross-Assembly Inference Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {326-337} }
HarmonySet: A Comprehensive Dataset for Understanding Video-Music Semantic Alignment and Temporal Synchronization: Zitang Zhou,

Ke Mei,

Yu Lu,

Tianyi Wang,

Fengyun Rao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zitang and Mei, Ke and Lu, Yu and Wang, Tianyi and Rao, Fengyun}, title = {HarmonySet: A Comprehensive Dataset for Understanding Video-Music Semantic Alignment and Temporal Synchronization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3152-3162} }
Rethinking Diffusion for Text-Driven Human Motion Generation: Redundant Representations, Evaluation, and Masked Autoregression: Zichong Meng,

Yiming Xie,

Xiaogang Peng,

Zeyu Han,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Zichong and Xie, Yiming and Peng, Xiaogang and Han, Zeyu and Jiang, Huaizu}, title = {Rethinking Diffusion for Text-Driven Human Motion Generation: Redundant Representations, Evaluation, and Masked Autoregression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27859-27871} }
StyleMaster: Stylize Your Video with Artistic Generation and Translation: Zixuan Ye,

Huijuan Huang,

Xintao Wang,

Pengfei Wan,

Di Zhang,

Wenhan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Zixuan and Huang, Huijuan and Wang, Xintao and Wan, Pengfei and Zhang, Di and Luo, Wenhan}, title = {StyleMaster: Stylize Your Video with Artistic Generation and Translation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2630-2640} }
Unsupervised Continual Domain Shift Learning with Multi-Prototype Modeling: Haopeng Sun,

Yingwei Zhang,

Lumin Xu,

Sheng Jin,

Ping Luo,

Chen Qian,

Wentao Liu,

Yiqiang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Haopeng and Zhang, Yingwei and Xu, Lumin and Jin, Sheng and Luo, Ping and Qian, Chen and Liu, Wentao and Chen, Yiqiang}, title = {Unsupervised Continual Domain Shift Learning with Multi-Prototype Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10131-10141} }
OmniGuard: Hybrid Manipulation Localization via Augmented Versatile Deep Image Watermarking: Xuanyu Zhang,

Zecheng Tang,

Zhipei Xu,

Runyi Li,

Youmin Xu,

Bin Chen,

Feng Gao,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xuanyu and Tang, Zecheng and Xu, Zhipei and Li, Runyi and Xu, Youmin and Chen, Bin and Gao, Feng and Zhang, Jian}, title = {OmniGuard: Hybrid Manipulation Localization via Augmented Versatile Deep Image Watermarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3008-3018} }
Open-Canopy: Towards Very High Resolution Forest Monitoring: Fajwel Fogel,

Yohann Perron,

Nikola Besic,

Laurent Saint-André,

Agnès Pellissier-Tanon,

Martin Schwartz,

Thomas Boudras,

Ibrahim Fayad,

Alexandre d'Aspremont,

Loic Landrieu,

Philippe Ciais; [pdf] [supp]
[bibtex]
@InProceedings{Fogel_2025_CVPR, author = {Fogel, Fajwel and Perron, Yohann and Besic, Nikola and Saint-Andr\'e, Laurent and Pellissier-Tanon, Agn\`es and Schwartz, Martin and Boudras, Thomas and Fayad, Ibrahim and d'Aspremont, Alexandre and Landrieu, Loic and Ciais, Philippe}, title = {Open-Canopy: Towards Very High Resolution Forest Monitoring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1395-1406} }
ClearSight: Visual Signal Enhancement for Object Hallucination Mitigation in Multimodal Large Language Models: Hao Yin,

Guangzong Si,

Zilei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Hao and Si, Guangzong and Wang, Zilei}, title = {ClearSight: Visual Signal Enhancement for Object Hallucination Mitigation in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14625-14634} }
Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget: Vikash Sehwag,

Xianghao Kong,

Jingtao Li,

Michael Spranger,

Lingjuan Lyu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sehwag_2025_CVPR, author = {Sehwag, Vikash and Kong, Xianghao and Li, Jingtao and Spranger, Michael and Lyu, Lingjuan}, title = {Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28596-28608} }
Guiding Human-Object Interactions with Rich Geometry and Relations: Mengqing Xue,

Yifei Liu,

Ling Guo,

Shaoli Huang,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Mengqing and Liu, Yifei and Guo, Ling and Huang, Shaoli and Ding, Changxing}, title = {Guiding Human-Object Interactions with Rich Geometry and Relations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22714-22723} }
TacoDepth: Towards Efficient Radar-Camera Depth Estimation with One-stage Fusion: Yiran Wang,

Jiaqi Li,

Chaoyi Hong,

Ruibo Li,

Liusheng Sun,

Xiao Song,

Zhe Wang,

Zhiguo Cao,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yiran and Li, Jiaqi and Hong, Chaoyi and Li, Ruibo and Sun, Liusheng and Song, Xiao and Wang, Zhe and Cao, Zhiguo and Lin, Guosheng}, title = {TacoDepth: Towards Efficient Radar-Camera Depth Estimation with One-stage Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10523-10533} }
Physical Plausibility-aware Trajectory Prediction via Locomotion Embodiment: Hiromu Taketsugu,

Takeru Oba,

Takahiro Maeda,

Shohei Nobuhara,

Norimichi Ukita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taketsugu_2025_CVPR, author = {Taketsugu, Hiromu and Oba, Takeru and Maeda, Takahiro and Nobuhara, Shohei and Ukita, Norimichi}, title = {Physical Plausibility-aware Trajectory Prediction via Locomotion Embodiment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12324-12334} }
CADDreamer: CAD Object Generation from Single-view Images: Yuan Li,

Cheng Lin,

Yuan Liu,

Xiaoxiao Long,

Chenxu Zhang,

Ningna Wang,

Xin Li,

Wenping Wang,

Xiaohu Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuan and Lin, Cheng and Liu, Yuan and Long, Xiaoxiao and Zhang, Chenxu and Wang, Ningna and Li, Xin and Wang, Wenping and Guo, Xiaohu}, title = {CADDreamer: CAD Object Generation from Single-view Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21448-21457} }
Vision-Language Model IP Protection via Prompt-based Learning: Lianyu Wang,

Meng Wang,

Huazhu Fu,

Daoqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lianyu and Wang, Meng and Fu, Huazhu and Zhang, Daoqiang}, title = {Vision-Language Model IP Protection via Prompt-based Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9497-9506} }
Where's the Liability in the Generative Era? Recovery-based Black-Box Detection of AI-Generated Content: Haoyue Bai,

Yiyou Sun,

Wei Cheng,

Haifeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Haoyue and Sun, Yiyou and Cheng, Wei and Chen, Haifeng}, title = {Where's the Liability in the Generative Era? Recovery-based Black-Box Detection of AI-Generated Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28821-28830} }
Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation: Jiantao Lin,

Xin Yang,

Meixi Chen,

Yingjie Xu,

Dongyu Yan,

Leyi Wu,

Xinli Xu,

Lie Xu,

Shunsi Zhang,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jiantao and Yang, Xin and Chen, Meixi and Xu, Yingjie and Yan, Dongyu and Wu, Leyi and Xu, Xinli and Xu, Lie and Zhang, Shunsi and Chen, Ying-Cong}, title = {Kiss3DGen: Repurposing Image Diffusion Models for 3D Asset Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5870-5880} }
DiTASK: Multi-Task Fine-Tuning with Diffeomorphic Transformations: Krishna Sri Ipsit Mantri,

Carola-Bibiane Schönlieb,

Bruno Ribeiro,

Chaim Baskin,

Moshe Eliasof; [pdf] [supp]
[bibtex]
@InProceedings{Mantri_2025_CVPR, author = {Mantri, Krishna Sri Ipsit and Sch\"onlieb, Carola-Bibiane and Ribeiro, Bruno and Baskin, Chaim and Eliasof, Moshe}, title = {DiTASK: Multi-Task Fine-Tuning with Diffeomorphic Transformations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25218-25229} }
OW-OVD: Unified Open World and Open Vocabulary Object Detection: Xing Xi,

Yangyang Huang,

Ronghua Luo,

Yu Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Xi_2025_CVPR, author = {Xi, Xing and Huang, Yangyang and Luo, Ronghua and Qiu, Yu}, title = {OW-OVD: Unified Open World and Open Vocabulary Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25454-25464} }
Improving Diffusion Inverse Problem Solving with Decoupled Noise Annealing: Bingliang Zhang,

Wenda Chu,

Julius Berner,

Chenlin Meng,

Anima Anandkumar,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bingliang and Chu, Wenda and Berner, Julius and Meng, Chenlin and Anandkumar, Anima and Song, Yang}, title = {Improving Diffusion Inverse Problem Solving with Decoupled Noise Annealing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20895-20905} }
AvatarArtist: Open-Domain 4D Avatarization: Hongyu Liu,

Xuan Wang,

Ziyu Wan,

Yue Ma,

Jingye Chen,

Yanbo Fan,

Yujun Shen,

Yibing Song,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hongyu and Wang, Xuan and Wan, Ziyu and Ma, Yue and Chen, Jingye and Fan, Yanbo and Shen, Yujun and Song, Yibing and Chen, Qifeng}, title = {AvatarArtist: Open-Domain 4D Avatarization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10758-10769} }
DesignDiffusion: High-Quality Text-to-Design Image Generation with Diffusion Models: Zhendong Wang,

Jianmin Bao,

Shuyang Gu,

Dong Chen,

Wengang Zhou,

Houqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhendong and Bao, Jianmin and Gu, Shuyang and Chen, Dong and Zhou, Wengang and Li, Houqiang}, title = {DesignDiffusion: High-Quality Text-to-Design Image Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20906-20915} }
Using Powerful Prior Knowledge of Diffusion Model in Deep Unfolding Networks for Image Compressive Sensing: Chen Liao,

Yan Shen,

Dan Li,

Zhongli Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Chen and Shen, Yan and Li, Dan and Wang, Zhongli}, title = {Using Powerful Prior Knowledge of Diffusion Model in Deep Unfolding Networks for Image Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18000-18010} }
Koala-36M: A Large-scale Video Dataset Improving Consistency between Fine-grained Conditions and Video Content: Qiuheng Wang,

Yukai Shi,

Jiarong Ou,

Rui Chen,

Ke Lin,

Jiahao Wang,

Boyuan Jiang,

Haotian Yang,

Mingwu Zheng,

Xin Tao,

Fei Yang,

Pengfei Wan,

Di Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qiuheng and Shi, Yukai and Ou, Jiarong and Chen, Rui and Lin, Ke and Wang, Jiahao and Jiang, Boyuan and Yang, Haotian and Zheng, Mingwu and Tao, Xin and Yang, Fei and Wan, Pengfei and Zhang, Di}, title = {Koala-36M: A Large-scale Video Dataset Improving Consistency between Fine-grained Conditions and Video Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8428-8437} }
VASparse: Towards Efficient Visual Hallucination Mitigation via Visual-Aware Token Sparsification: Xianwei Zhuang,

Zhihong Zhu,

Yuxin Xie,

Liming Liang,

Yuexian Zou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Xianwei and Zhu, Zhihong and Xie, Yuxin and Liang, Liming and Zou, Yuexian}, title = {VASparse: Towards Efficient Visual Hallucination Mitigation via Visual-Aware Token Sparsification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4189-4199} }
SPARC: Score Prompting and Adaptive Fusion for Zero-Shot Multi-Label Recognition in Vision-Language Models: Kevin Miller,

Aditya Gangrade,

Samarth Mishra,

Kate Saenko,

Venkatesh Saligrama; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miller_2025_CVPR, author = {Miller, Kevin and Gangrade, Aditya and Mishra, Samarth and Saenko, Kate and Saligrama, Venkatesh}, title = {SPARC: Score Prompting and Adaptive Fusion for Zero-Shot Multi-Label Recognition in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4313-4321} }
UniGoal: Towards Universal Zero-shot Goal-oriented Navigation: Hang Yin,

Xiuwei Xu,

Linqing Zhao,

Ziwei Wang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Hang and Xu, Xiuwei and Zhao, Linqing and Wang, Ziwei and Zhou, Jie and Lu, Jiwen}, title = {UniGoal: Towards Universal Zero-shot Goal-oriented Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19057-19066} }
Noise-Consistent Siamese-Diffusion for Medical Image Synthesis and Segmentation: Kunpeng Qiu,

Zhiqiang Gao,

Zhiying Zhou,

Mingjie Sun,

Yongxin Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Kunpeng and Gao, Zhiqiang and Zhou, Zhiying and Sun, Mingjie and Guo, Yongxin}, title = {Noise-Consistent Siamese-Diffusion for Medical Image Synthesis and Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15672-15681} }
DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection: Jaewoo Song,

Daemin Park,

Kanghyun Baek,

Sangyub Lee,

Jooyoung Choi,

Eunji Kim,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Jaewoo and Park, Daemin and Baek, Kanghyun and Lee, Sangyub and Choi, Jooyoung and Kim, Eunji and Yoon, Sungroh}, title = {DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18718-18727} }
Less is More: Efficient Image Vectorization with Adaptive Parameterization: Kaibo Zhao,

Liang Bao,

Yufei Li,

Xu Su,

Ke Zhang,

Xiaotian Qiao; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Kaibo and Bao, Liang and Li, Yufei and Su, Xu and Zhang, Ke and Qiao, Xiaotian}, title = {Less is More: Efficient Image Vectorization with Adaptive Parameterization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18166-18175} }
FedMIA: An Effective Membership Inference Attack Exploiting "All for One" Principle in Federated Learning: Gongxi Zhu,

Donghao Li,

Hanlin Gu,

Yuan Yao,

Lixin Fan,

Yuxing Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Gongxi and Li, Donghao and Gu, Hanlin and Yao, Yuan and Fan, Lixin and Han, Yuxing}, title = {FedMIA: An Effective Membership Inference Attack Exploiting ''All for One'' Principle in Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20643-20653} }
Erase Diffusion: Empowering Object Removal Through Calibrating Diffusion Pathways: Yi Liu,

Hao Zhou,

Benlei Cui,

Wenxiang Shang,

Ran Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yi and Zhou, Hao and Cui, Benlei and Shang, Wenxiang and Lin, Ran}, title = {Erase Diffusion: Empowering Object Removal Through Calibrating Diffusion Pathways}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2418-2427} }
Prompt-CAM: Making Vision Transformers Interpretable for Fine-Grained Analysis: Arpita Chowdhury,

Dipanjyoti Paul,

Zheda Mai,

Jianyang Gu,

Ziheng Zhang,

Kazi Sajeed Mehrab,

Elizabeth G. Campolongo,

Daniel Rubenstein,

Charles V. Stewart,

Anuj Karpatne,

Tanya Berger-Wolf,

Yu Su,

Wei-Lun Chao; [pdf] [supp]
[bibtex]
@InProceedings{Chowdhury_2025_CVPR, author = {Chowdhury, Arpita and Paul, Dipanjyoti and Mai, Zheda and Gu, Jianyang and Zhang, Ziheng and Mehrab, Kazi Sajeed and Campolongo, Elizabeth G. and Rubenstein, Daniel and Stewart, Charles V. and Karpatne, Anuj and Berger-Wolf, Tanya and Su, Yu and Chao, Wei-Lun}, title = {Prompt-CAM: Making Vision Transformers Interpretable for Fine-Grained Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4375-4385} }
Instruction-based Image Manipulation by Watching How Things Move: Mingdeng Cao,

Xuaner Zhang,

Yinqiang Zheng,

Zhihao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Mingdeng and Zhang, Xuaner and Zheng, Yinqiang and Xia, Zhihao}, title = {Instruction-based Image Manipulation by Watching How Things Move}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2704-2713} }
DPFlow: Adaptive Optical Flow Estimation with a Dual-Pyramid Framework: Henrique Morimitsu,

Xiaobin Zhu,

Roberto M. Cesar,

Xiangyang Ji,

Xu-Cheng Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morimitsu_2025_CVPR, author = {Morimitsu, Henrique and Zhu, Xiaobin and Cesar, Roberto M. and Ji, Xiangyang and Yin, Xu-Cheng}, title = {DPFlow: Adaptive Optical Flow Estimation with a Dual-Pyramid Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17810-17820} }
DocSAM: Unified Document Image Segmentation via Query Decomposition and Heterogeneous Mixed Learning: Xiao-Hui Li,

Fei Yin,

Cheng-Lin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiao-Hui and Yin, Fei and Liu, Cheng-Lin}, title = {DocSAM: Unified Document Image Segmentation via Query Decomposition and Heterogeneous Mixed Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15021-15032} }
Ferret: An Efficient Online Continual Learning Framework under Varying Memory Constraints: Yuhao Zhou,

Yuxin Tian,

Jindi Lv,

Mingjia Shi,

Yuanxi Li,

Qing Ye,

Shuhao Zhang,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yuhao and Tian, Yuxin and Lv, Jindi and Shi, Mingjia and Li, Yuanxi and Ye, Qing and Zhang, Shuhao and Lv, Jiancheng}, title = {Ferret: An Efficient Online Continual Learning Framework under Varying Memory Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4850-4861} }
Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling: Junha Hyung,

Kinam Kim,

Susung Hong,

Min-Jung Kim,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hyung_2025_CVPR, author = {Hyung, Junha and Kim, Kinam and Hong, Susung and Kim, Min-Jung and Choo, Jaegul}, title = {Spatiotemporal Skip Guidance for Enhanced Video Diffusion Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11006-11015} }
VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?: Yunlong Tang,

Junjia Guo,

Hang Hua,

Susan Liang,

Mingqian Feng,

Xinyang Li,

Rui Mao,

Chao Huang,

Jing Bi,

Zeliang Zhang,

Pooyan Fazli,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yunlong and Guo, Junjia and Hua, Hang and Liang, Susan and Feng, Mingqian and Li, Xinyang and Mao, Rui and Huang, Chao and Bi, Jing and Zhang, Zeliang and Fazli, Pooyan and Xu, Chenliang}, title = {VidComposition: Can MLLMs Analyze Compositions in Compiled Videos?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8490-8500} }
SAR3D: Autoregressive 3D Object Generation and Understanding via Multi-scale 3D VQVAE: Yongwei Chen,

Yushi Lan,

Shangchen Zhou,

Tengfei Wang,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yongwei and Lan, Yushi and Zhou, Shangchen and Wang, Tengfei and Pan, Xingang}, title = {SAR3D: Autoregressive 3D Object Generation and Understanding via Multi-scale 3D VQVAE}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28371-28382} }
Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation: Ying Jin,

Jinlong Peng,

Qingdong He,

Teng Hu,

Jiafu Wu,

Hao Chen,

Haoxuan Wang,

Wenbing Zhu,

Mingmin Chi,

Jun Liu,

Yabiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Ying and Peng, Jinlong and He, Qingdong and Hu, Teng and Wu, Jiafu and Chen, Hao and Wang, Haoxuan and Zhu, Wenbing and Chi, Mingmin and Liu, Jun and Wang, Yabiao}, title = {Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30420-30429} }
ODE: Open-Set Evaluation of Hallucinations in Multimodal Large Language Models: Yahan Tu,

Rui Hu,

Jitao Sang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Yahan and Hu, Rui and Sang, Jitao}, title = {ODE: Open-Set Evaluation of Hallucinations in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19836-19845} }
Self-Supervised Learning for Color Spike Camera Reconstruction: Yanchen Dong,

Ruiqin Xiong,

Xiaopeng Fan,

Zhaofei Yu,

Yonghong Tian,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Yanchen and Xiong, Ruiqin and Fan, Xiaopeng and Yu, Zhaofei and Tian, Yonghong and Huang, Tiejun}, title = {Self-Supervised Learning for Color Spike Camera Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6231-6240} }
Interactive Medical Image Analysis with Concept-based Similarity Reasoning: Ta Duc Huy,

Sen Kim Tran,

Phan Nguyen,

Nguyen Hoang Tran,

Tran Bao Sam,

Anton van den Hengel,

Zhibin Liao,

Johan W. Verjans,

Minh-Son To,

Vu Minh Hieu Phan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huy_2025_CVPR, author = {Huy, Ta Duc and Tran, Sen Kim and Nguyen, Phan and Tran, Nguyen Hoang and Sam, Tran Bao and van den Hengel, Anton and Liao, Zhibin and Verjans, Johan W. and To, Minh-Son and Phan, Vu Minh Hieu}, title = {Interactive Medical Image Analysis with Concept-based Similarity Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30797-30806} }
From Elements to Design: A Layered Approach for Automatic Graphic Design Composition: Jiawei Lin,

Shizhao Sun,

Danqing Huang,

Ting Liu,

Ji Li,

Jiang Bian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jiawei and Sun, Shizhao and Huang, Danqing and Liu, Ting and Li, Ji and Bian, Jiang}, title = {From Elements to Design: A Layered Approach for Automatic Graphic Design Composition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8128-8137} }
h-Edit: Effective and Flexible Diffusion-Based Editing via Doob's h-Transform: Toan Nguyen,

Kien Do,

Duc Kieu,

Thin Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Toan and Do, Kien and Kieu, Duc and Nguyen, Thin}, title = {h-Edit: Effective and Flexible Diffusion-Based Editing via Doob's h-Transform}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28490-28501} }
Masking meets Supervision: A Strong Learning Alliance: Byeongho Heo,

Taekyung Kim,

Sangdoo Yun,

Dongyoon Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2025_CVPR, author = {Heo, Byeongho and Kim, Taekyung and Yun, Sangdoo and Han, Dongyoon}, title = {Masking meets Supervision: A Strong Learning Alliance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20447-20457} }
DI-PCG: Diffusion-based Efficient Inverse Procedural Content Generation for High-quality 3D Asset Creation: Wang Zhao,

Yan-Pei Cao,

Jiale Xu,

Yuejiang Dong,

Ying Shan; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Wang and Cao, Yan-Pei and Xu, Jiale and Dong, Yuejiang and Shan, Ying}, title = {DI-PCG: Diffusion-based Efficient Inverse Procedural Content Generation for High-quality 3D Asset Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11061-11072} }
SALOVA: Segment-Augmented Long Video Assistant for Targeted Retrieval and Routing in Long-Form Video Analysis: Junho Kim,

Hyunjun Kim,

Hosu Lee,

Yong Man Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Junho and Kim, Hyunjun and Lee, Hosu and Ro, Yong Man}, title = {SALOVA: Segment-Augmented Long Video Assistant for Targeted Retrieval and Routing in Long-Form Video Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3352-3362} }
Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual Notes for Visual Question Answering: Wenlong Fang,

Qiaofeng Wu,

Jing Chen,

Yun Xue; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Wenlong and Wu, Qiaofeng and Chen, Jing and Xue, Yun}, title = {Notes-guided MLLM Reasoning: Enhancing MLLM with Knowledge and Visual Notes for Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19597-19607} }
Are Spatial-Temporal Graph Convolution Networks for Human Action Recognition Over-Parameterized?: Jianyang Xie,

Yitian Zhao,

Yanda Meng,

He Zhao,

Anh Nguyen,

Yalin Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Jianyang and Zhao, Yitian and Meng, Yanda and Zhao, He and Nguyen, Anh and Zheng, Yalin}, title = {Are Spatial-Temporal Graph Convolution Networks for Human Action Recognition Over-Parameterized?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24309-24319} }
DA-VPT: Semantic-Guided Visual Prompt Tuning for Vision Transformers: Li Ren,

Chen Chen,

Liqiang Wang,

Kien Hua; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Li and Chen, Chen and Wang, Liqiang and Hua, Kien}, title = {DA-VPT: Semantic-Guided Visual Prompt Tuning for Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4353-4363} }
Towards Lossless Implicit Neural Representation via Bit Plane Decomposition: Woo Kyoung Han,

Byeonghun Lee,

Hyunmin Cho,

Sunghoon Im,

Kyong Hwan Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Woo Kyoung and Lee, Byeonghun and Cho, Hyunmin and Im, Sunghoon and Jin, Kyong Hwan}, title = {Towards Lossless Implicit Neural Representation via Bit Plane Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2269-2278} }
Spectral State Space Model for Rotation-Invariant Visual Representation Learning: Sahar Dastani,

Ali Bahri,

Moslem Yazdanpanah,

Mehrdad Noori,

David Osowiechi,

Gustavo Adolfo Vargas Hakim,

Farzad Beizaee,

Milad Cheraghalikhani,

Arnab Kumar Mondal,

Herve Lombaert,

Christian Desrosiers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dastani_2025_CVPR, author = {Dastani, Sahar and Bahri, Ali and Yazdanpanah, Moslem and Noori, Mehrdad and Osowiechi, David and Hakim, Gustavo Adolfo Vargas and Beizaee, Farzad and Cheraghalikhani, Milad and Mondal, Arnab Kumar and Lombaert, Herve and Desrosiers, Christian}, title = {Spectral State Space Model for Rotation-Invariant Visual Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23881-23890} }
iSegMan: Interactive Segment-and-Manipulate 3D Gaussians: Yian Zhao,

Wanshi Xu,

Ruochong Zheng,

Pengchong Qiao,

Chang Liu,

Jie Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yian and Xu, Wanshi and Zheng, Ruochong and Qiao, Pengchong and Liu, Chang and Chen, Jie}, title = {iSegMan: Interactive Segment-and-Manipulate 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {661-670} }
BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices: Xudong Lu,

Yinghao Chen,

Cheng Chen,

Hui Tan,

Boheng Chen,

Yina Xie,

Rui Hu,

Guanxin Tan,

Renshou Wu,

Yan Hu,

Yi Zeng,

Lei Wu,

Liuyang Bian,

Zhaoxiong Wang,

Long Liu,

Yanzhou Yang,

Han Xiao,

Aojun Zhou,

Yafei Wen,

Xiaoxin Chen,

Shuai Ren,

Hongsheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Xudong and Chen, Yinghao and Chen, Cheng and Tan, Hui and Chen, Boheng and Xie, Yina and Hu, Rui and Tan, Guanxin and Wu, Renshou and Hu, Yan and Zeng, Yi and Wu, Lei and Bian, Liuyang and Wang, Zhaoxiong and Liu, Long and Yang, Yanzhou and Xiao, Han and Zhou, Aojun and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4145-4155} }
Unraveling Normal Anatomy via Fluid-Driven Anomaly Randomization: Peirong Liu,

Ana Lawry Aguila,

Juan E. Iglesias; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Peirong and Aguila, Ana Lawry and Iglesias, Juan E.}, title = {Unraveling Normal Anatomy via Fluid-Driven Anomaly Randomization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10455-10465} }
Taming Teacher Forcing for Masked Autoregressive Video Generation: Deyu Zhou,

Quan Sun,

Yuang Peng,

Kun Yan,

Runpei Dong,

Duomin Wang,

Zheng Ge,

Nan Duan,

Xiangyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Deyu and Sun, Quan and Peng, Yuang and Yan, Kun and Dong, Runpei and Wang, Duomin and Ge, Zheng and Duan, Nan and Zhang, Xiangyu}, title = {Taming Teacher Forcing for Masked Autoregressive Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7374-7384} }
UniRestore: Unified Perceptual and Task-Oriented Image Restoration Model Using Diffusion Prior: I-Hsiang Chen,

Wei-Ting Chen,

Yu-Wei Liu,

Yuan-Chun Chiang,

Sy-Yen Kuo,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, I-Hsiang and Chen, Wei-Ting and Liu, Yu-Wei and Chiang, Yuan-Chun and Kuo, Sy-Yen and Yang, Ming-Hsuan}, title = {UniRestore: Unified Perceptual and Task-Oriented Image Restoration Model Using Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17969-17979} }
Sharp-It: A Multi-view to Multi-view Diffusion Model for 3D Synthesis and Manipulation: Yiftach Edelstein,

Or Patashnik,

Dana Cohen-Bar,

Lihi Zelnik-Manor; [pdf] [supp]
[bibtex]
@InProceedings{Edelstein_2025_CVPR, author = {Edelstein, Yiftach and Patashnik, Or and Cohen-Bar, Dana and Zelnik-Manor, Lihi}, title = {Sharp-It: A Multi-view to Multi-view Diffusion Model for 3D Synthesis and Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21458-21468} }
URWKV: Unified RWKV Model with Multi-state Perspective for Low-light Image Restoration: Rui Xu,

Yuzhen Niu,

Yuezhou Li,

Huangbiao Xu,

Wenxi Liu,

Yuzhong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Rui and Niu, Yuzhen and Li, Yuezhou and Xu, Huangbiao and Liu, Wenxi and Chen, Yuzhong}, title = {URWKV: Unified RWKV Model with Multi-state Perspective for Low-light Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21267-21276} }
Revisiting Backdoor Attacks against Large Vision-Language Models from Domain Shift: Siyuan Liang,

Jiawei Liang,

Tianyu Pang,

Chao Du,

Aishan Liu,

Mingli Zhu,

Xiaochun Cao,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Siyuan and Liang, Jiawei and Pang, Tianyu and Du, Chao and Liu, Aishan and Zhu, Mingli and Cao, Xiaochun and Tao, Dacheng}, title = {Revisiting Backdoor Attacks against Large Vision-Language Models from Domain Shift}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9477-9486} }
Condensing Action Segmentation Datasets via Generative Network Inversion: Guodong Ding,

Rongyu Chen,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Guodong and Chen, Rongyu and Yao, Angela}, title = {Condensing Action Segmentation Datasets via Generative Network Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17733-17742} }
TCFG: Tangential Damping Classifier-free Guidance: Mingi Kwon,

Shin seong Kim,

Jaeseok Jeong,

Yi Ting Hsiao,

Youngjung Uh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2025_CVPR, author = {Kwon, Mingi and Kim, Shin seong and Jeong, Jaeseok and Hsiao, Yi Ting and Uh, Youngjung}, title = {TCFG: Tangential Damping Classifier-free Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2620-2629} }
MatAnyone: Stable Video Matting with Consistent Memory Propagation: Peiqing Yang,

Shangchen Zhou,

Jixin Zhao,

Qingyi Tao,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Peiqing and Zhou, Shangchen and Zhao, Jixin and Tao, Qingyi and Loy, Chen Change}, title = {MatAnyone: Stable Video Matting with Consistent Memory Propagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7299-7308} }
Can Generative Video Models Help Pose Estimation?: Ruojin Cai,

Jason Y. Zhang,

Philipp Henzler,

Zhengqi Li,

Noah Snavely,

Ricardo Martin-Brualla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Ruojin and Zhang, Jason Y. and Henzler, Philipp and Li, Zhengqi and Snavely, Noah and Martin-Brualla, Ricardo}, title = {Can Generative Video Models Help Pose Estimation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16764-16773} }
Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models: Matt Deitke,

Christopher Clark,

Sangho Lee,

Rohun Tripathi,

Yue Yang,

Jae Sung Park,

Mohammadreza Salehi,

Niklas Muennighoff,

Kyle Lo,

Luca Soldaini,

Jiasen Lu,

Taira Anderson,

Erin Bransom,

Kiana Ehsani,

Huong Ngo,

YenSung Chen,

Ajay Patel,

Mark Yatskar,

Chris Callison-Burch,

Andrew Head,

Rose Hendrix,

Favyen Bastani,

Eli VanderBilt,

Nathan Lambert,

Yvonne Chou,

Arnavi Chheda,

Jenna Sparks,

Sam Skjonsberg,

Michael Schmitz,

Aaron Sarnat,

Byron Bischoff,

Pete Walsh,

Chris Newell,

Piper Wolters,

Tanmay Gupta,

Kuo-Hao Zeng,

Jon Borchardt,

Dirk Groeneveld,

Crystal Nam,

Sophie Lebrecht,

Caitlin Wittlif,

Carissa Schoenick,

Oscar Michel,

Ranjay Krishna,

Luca Weihs,

Noah A. Smith,

Hannaneh Hajishirzi,

Ross Girshick,

Ali Farhadi,

Aniruddha Kembhavi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deitke_2025_CVPR, author = {Deitke, Matt and Clark, Christopher and Lee, Sangho and Tripathi, Rohun and Yang, Yue and Park, Jae Sung and Salehi, Mohammadreza and Muennighoff, Niklas and Lo, Kyle and Soldaini, Luca and Lu, Jiasen and Anderson, Taira and Bransom, Erin and Ehsani, Kiana and Ngo, Huong and Chen, YenSung and Patel, Ajay and Yatskar, Mark and Callison-Burch, Chris and Head, Andrew and Hendrix, Rose and Bastani, Favyen and VanderBilt, Eli and Lambert, Nathan and Chou, Yvonne and Chheda, Arnavi and Sparks, Jenna and Skjonsberg, Sam and Schmitz, Michael and Sarnat, Aaron and Bischoff, Byron and Walsh, Pete and Newell, Chris and Wolters, Piper and Gupta, Tanmay and Zeng, Kuo-Hao and Borchardt, Jon and Groeneveld, Dirk and Nam, Crystal and Lebrecht, Sophie and Wittlif, Caitlin and Schoenick, Carissa and Michel, Oscar and Krishna, Ranjay and Weihs, Luca and Smith, Noah A. and Hajishirzi, Hannaneh and Girshick, Ross and Farhadi, Ali and Kembhavi, Aniruddha}, title = {Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {91-104} }
DriveGPT4-V2: Harnessing Large Language Model Capabilities for Enhanced Closed-Loop Autonomous Driving: Zhenhua Xu,

Yan Bai,

Yujia Zhang,

Zhuoling Li,

Fei Xia,

Kwan-Yee K. Wong,

Jianqiang Wang,

Hengshuang Zhao; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zhenhua and Bai, Yan and Zhang, Yujia and Li, Zhuoling and Xia, Fei and Wong, Kwan-Yee K. and Wang, Jianqiang and Zhao, Hengshuang}, title = {DriveGPT4-V2: Harnessing Large Language Model Capabilities for Enhanced Closed-Loop Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17261-17270} }
High-Fidelity Lightweight Mesh Reconstruction from Point Clouds: Chen Zhang,

Wentao Wang,

Ximeng Li,

Xinyao Liao,

Wanjuan Su,

Wenbing Tao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chen and Wang, Wentao and Li, Ximeng and Liao, Xinyao and Su, Wanjuan and Tao, Wenbing}, title = {High-Fidelity Lightweight Mesh Reconstruction from Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11739-11748} }
MDP: Multidimensional Vision Model Pruning with Latency Constraint: Xinglong Sun,

Barath Lakshmanan,

Maying Shen,

Shiyi Lan,

Jingde Chen,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Xinglong and Lakshmanan, Barath and Shen, Maying and Lan, Shiyi and Chen, Jingde and Alvarez, Jose M.}, title = {MDP: Multidimensional Vision Model Pruning with Latency Constraint}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20113-20123} }
OSDFace: One-Step Diffusion Model for Face Restoration: Jingkai Wang,

Jue Gong,

Lin Zhang,

Zheng Chen,

Xing Liu,

Hong Gu,

Yutong Liu,

Yulun Zhang,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jingkai and Gong, Jue and Zhang, Lin and Chen, Zheng and Liu, Xing and Gu, Hong and Liu, Yutong and Zhang, Yulun and Yang, Xiaokang}, title = {OSDFace: One-Step Diffusion Model for Face Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12626-12636} }
Task Singular Vectors: Reducing Task Interference in Model Merging: Antonio Andrea Gargiulo,

Donato Crisostomi,

Maria Sofia Bucarelli,

Simone Scardapane,

Fabrizio Silvestri,

Emanuele Rodolà; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gargiulo_2025_CVPR, author = {Gargiulo, Antonio Andrea and Crisostomi, Donato and Bucarelli, Maria Sofia and Scardapane, Simone and Silvestri, Fabrizio and Rodol\`a, Emanuele}, title = {Task Singular Vectors: Reducing Task Interference in Model Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18695-18705} }
Functionality Understanding and Segmentation in 3D Scenes: Jaime Corsetti,

Francesco Giuliari,

Alice Fasoli,

Davide Boscaini,

Fabio Poiesi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Corsetti_2025_CVPR, author = {Corsetti, Jaime and Giuliari, Francesco and Fasoli, Alice and Boscaini, Davide and Poiesi, Fabio}, title = {Functionality Understanding and Segmentation in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24550-24559} }
Dragin3D: Image Editing by Dragging in 3D Space: Weiran Guang,

Xiaoguang Gu,

Mengqi Huang,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Guang_2025_CVPR, author = {Guang, Weiran and Gu, Xiaoguang and Huang, Mengqi and Mao, Zhendong}, title = {Dragin3D: Image Editing by Dragging in 3D Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21502-21512} }
MMTL-UniAD: A Unified Framework for Multimodal and Multi-Task Learning in Assistive Driving Perception: Wenzhuo Liu,

Wenshuo Wang,

Yicheng Qiao,

Qiannan Guo,

Jiayin Zhu,

Pengfei Li,

Zilong Chen,

Huiming Yang,

Zhiwei Li,

Lening Wang,

Tiao Tan,

Huaping Liu; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Wenzhuo and Wang, Wenshuo and Qiao, Yicheng and Guo, Qiannan and Zhu, Jiayin and Li, Pengfei and Chen, Zilong and Yang, Huiming and Li, Zhiwei and Wang, Lening and Tan, Tiao and Liu, Huaping}, title = {MMTL-UniAD: A Unified Framework for Multimodal and Multi-Task Learning in Assistive Driving Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6864-6874} }
T2V-CompBench: A Comprehensive Benchmark for Compositional Text-to-video Generation: Kaiyue Sun,

Kaiyi Huang,

Xian Liu,

Yue Wu,

Zihan Xu,

Zhenguo Li,

Xihui Liu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Kaiyue and Huang, Kaiyi and Liu, Xian and Wu, Yue and Xu, Zihan and Li, Zhenguo and Liu, Xihui}, title = {T2V-CompBench: A Comprehensive Benchmark for Compositional Text-to-video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8406-8416} }
Self-Evolving Visual Concept Library using Vision-Language Critics: Atharva Sehgal,

Patrick Yuan,

Ziniu Hu,

Yisong Yue,

Jennifer J. Sun,

Swarat Chaudhuri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sehgal_2025_CVPR, author = {Sehgal, Atharva and Yuan, Patrick and Hu, Ziniu and Yue, Yisong and Sun, Jennifer J. and Chaudhuri, Swarat}, title = {Self-Evolving Visual Concept Library using Vision-Language Critics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13124-13134} }
Multimodal Autoregressive Pre-training of Large Vision Encoders: Enrico Fini,

Mustafa Shukor,

Xiujun Li,

Philipp Dufter,

Michal Klein,

David Haldimann,

Sai Aitharaju,

Victor G. Turrisi da Costa,

Louis Béthune,

Zhe Gan,

Alexander Toshev,

Marcin Eichner,

Moin Nabi,

Yinfei Yang,

Joshua Susskind,

Alaaeldin El-Nouby; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fini_2025_CVPR, author = {Fini, Enrico and Shukor, Mustafa and Li, Xiujun and Dufter, Philipp and Klein, Michal and Haldimann, David and Aitharaju, Sai and da Costa, Victor G. Turrisi and B\'ethune, Louis and Gan, Zhe and Toshev, Alexander and Eichner, Marcin and Nabi, Moin and Yang, Yinfei and Susskind, Joshua and El-Nouby, Alaaeldin}, title = {Multimodal Autoregressive Pre-training of Large Vision Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9641-9654} }
AKiRa: Augmentation Kit on Rays for Optical Video Generation: Xi Wang,

Robin Courant,

Marc Christie,

Vicky Kalogeiton; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xi and Courant, Robin and Christie, Marc and Kalogeiton, Vicky}, title = {AKiRa: Augmentation Kit on Rays for Optical Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2609-2619} }
Towards Stable and Storage-efficient Dataset Distillation: Matching Convexified Trajectory: Wenliang Zhong,

Haoyu Tang,

Qinghai Zheng,

Mingzhu Xu,

Yupeng Hu,

Weili Guan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Wenliang and Tang, Haoyu and Zheng, Qinghai and Xu, Mingzhu and Hu, Yupeng and Guan, Weili}, title = {Towards Stable and Storage-efficient Dataset Distillation: Matching Convexified Trajectory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25581-25589} }
TSAM: Temporal SAM Augmented with Multimodal Prompts for Referring Audio-Visual Segmentation: Abduljalil Radman,

Jorma Laaksonen; [pdf] [supp]
[bibtex]
@InProceedings{Radman_2025_CVPR, author = {Radman, Abduljalil and Laaksonen, Jorma}, title = {TSAM: Temporal SAM Augmented with Multimodal Prompts for Referring Audio-Visual Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23947-23956} }
TFCustom: Customized Image Generation with Time-Aware Frequency Feature Guidance: Mushui Liu,

Dong She,

Jingxuan Pang,

Qihan Huang,

Jiacheng Ying,

Wanggui He,

Yuanlei Hou,

Siming Fu; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Mushui and She, Dong and Pang, Jingxuan and Huang, Qihan and Ying, Jiacheng and He, Wanggui and Hou, Yuanlei and Fu, Siming}, title = {TFCustom: Customized Image Generation with Time-Aware Frequency Feature Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2714-2723} }
Boosting Point-Supervised Temporal Action Localization through Integrating Query Reformation and Optimal Transport: Mengnan Liu,

Le Wang,

Sanping Zhou,

Kun Xia,

Xiaolong Sun,

Gang Hua; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Mengnan and Wang, Le and Zhou, Sanping and Xia, Kun and Sun, Xiaolong and Hua, Gang}, title = {Boosting Point-Supervised Temporal Action Localization through Integrating Query Reformation and Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13865-13875} }
SketchFusion: Learning Universal Sketch Features through Fusing Foundation Models: Subhadeep Koley,

Tapas Kumar Dutta,

Aneeshan Sain,

Pinaki Nath Chowdhury,

Ayan Kumar Bhunia,

Yi-Zhe Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Koley_2025_CVPR, author = {Koley, Subhadeep and Dutta, Tapas Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Bhunia, Ayan Kumar and Song, Yi-Zhe}, title = {SketchFusion: Learning Universal Sketch Features through Fusing Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2556-2567} }
Bridging the Vision-Brain Gap with an Uncertainty-Aware Blur Prior: Haitao Wu,

Qing Li,

Changqing Zhang,

Zhen He,

Xiaomin Ying; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Haitao and Li, Qing and Zhang, Changqing and He, Zhen and Ying, Xiaomin}, title = {Bridging the Vision-Brain Gap with an Uncertainty-Aware Blur Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2246-2257} }
Invisible Backdoor Attack against Self-supervised Learning: Hanrong Zhang,

Zhenting Wang,

Boheng Li,

Fulin Lin,

Tingxu Han,

Mingyu Jin,

Chenlu Zhan,

Mengnan Du,

Hongwei Wang,

Shiqing Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hanrong and Wang, Zhenting and Li, Boheng and Lin, Fulin and Han, Tingxu and Jin, Mingyu and Zhan, Chenlu and Du, Mengnan and Wang, Hongwei and Ma, Shiqing}, title = {Invisible Backdoor Attack against Self-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25790-25801} }
Perceptually Accurate 3D Talking Head Generation: New Definitions, Speech-Mesh Representation, and Evaluation Metrics: Lee Chae-Yeon,

Oh Hyun-Bin,

Han EunGi,

Kim Sung-Bin,

Suekyeong Nam,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chae-Yeon_2025_CVPR, author = {Chae-Yeon, Lee and Hyun-Bin, Oh and EunGi, Han and Sung-Bin, Kim and Nam, Suekyeong and Oh, Tae-Hyun}, title = {Perceptually Accurate 3D Talking Head Generation: New Definitions, Speech-Mesh Representation, and Evaluation Metrics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21065-21074} }
BWFormer: Building Wireframe Reconstruction from Airborne LiDAR Point Cloud with Transformer: Yuzhou Liu,

Lingjie Zhu,

Hanqiao Ye,

Shangfeng Huang,

Xiang Gao,

Xianwei Zheng,

Shuhan Shen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzhou and Zhu, Lingjie and Ye, Hanqiao and Huang, Shangfeng and Gao, Xiang and Zheng, Xianwei and Shen, Shuhan}, title = {BWFormer: Building Wireframe Reconstruction from Airborne LiDAR Point Cloud with Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22215-22224} }
Diffusion-4K: Ultra-High-Resolution Image Synthesis with Latent Diffusion Models: Jinjin Zhang,

Qiuyu Huang,

Junjie Liu,

Xiefan Guo,

Di Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinjin and Huang, Qiuyu and Liu, Junjie and Guo, Xiefan and Huang, Di}, title = {Diffusion-4K: Ultra-High-Resolution Image Synthesis with Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23464-23473} }
AffordDP: Generalizable Diffusion Policy with Transferable Affordance: Shijie Wu,

Yihang Zhu,

Yunao Huang,

Kaizhen Zhu,

Jiayuan Gu,

Jingyi Yu,

Ye Shi,

Jingya Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Shijie and Zhu, Yihang and Huang, Yunao and Zhu, Kaizhen and Gu, Jiayuan and Yu, Jingyi and Shi, Ye and Wang, Jingya}, title = {AffordDP: Generalizable Diffusion Policy with Transferable Affordance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6971-6980} }
HMAR: Efficient Hierarchical Masked Auto-Regressive Image Generation: Hermann Kumbong,

Xian Liu,

Tsung-Yi Lin,

Ming-Yu Liu,

Xihui Liu,

Ziwei Liu,

Daniel Y. Fu,

Christopher Re,

David W. Romero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumbong_2025_CVPR, author = {Kumbong, Hermann and Liu, Xian and Lin, Tsung-Yi and Liu, Ming-Yu and Liu, Xihui and Liu, Ziwei and Fu, Daniel Y. and Re, Christopher and Romero, David W.}, title = {HMAR: Efficient Hierarchical Masked Auto-Regressive Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2535-2544} }
OmniDrive: A Holistic Vision-Language Dataset for Autonomous Driving with Counterfactual Reasoning: Shihao Wang,

Zhiding Yu,

Xiaohui Jiang,

Shiyi Lan,

Min Shi,

Nadine Chang,

Jan Kautz,

Ying Li,

Jose M. Alvarez; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shihao and Yu, Zhiding and Jiang, Xiaohui and Lan, Shiyi and Shi, Min and Chang, Nadine and Kautz, Jan and Li, Ying and Alvarez, Jose M.}, title = {OmniDrive: A Holistic Vision-Language Dataset for Autonomous Driving with Counterfactual Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22442-22452} }
DKC: Differentiated Knowledge Consolidation for Cloth-Hybrid Lifelong Person Re-identification: Zhenyu Cui,

Jiahuan Zhou,

Yuxin Peng; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Zhenyu and Zhou, Jiahuan and Peng, Yuxin}, title = {DKC: Differentiated Knowledge Consolidation for Cloth-Hybrid Lifelong Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3573-3582} }
Enhancing Facial Privacy Protection via Weakening Diffusion Purification: Ali Salar,

Qing Liu,

Yingli Tian,

Guoying Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Salar_2025_CVPR, author = {Salar, Ali and Liu, Qing and Tian, Yingli and Zhao, Guoying}, title = {Enhancing Facial Privacy Protection via Weakening Diffusion Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8235-8244} }
ORIDa: Object-centric Real-world Image Composition Dataset: Jinwoo Kim,

Sangmin Han,

Jinho Jeong,

Jiwoo Choi,

Dongyeoung Kim,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jinwoo and Han, Sangmin and Jeong, Jinho and Choi, Jiwoo and Kim, Dongyeoung and Kim, Seon Joo}, title = {ORIDa: Object-centric Real-world Image Composition Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3051-3060} }
MeGA: Hybrid Mesh-Gaussian Head Avatar for High-Fidelity Rendering and Head Editing: Cong Wang,

Di Kang,

Heyi Sun,

Shenhan Qian,

Zixuan Wang,

Linchao Bao,

Song-Hai Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Cong and Kang, Di and Sun, Heyi and Qian, Shenhan and Wang, Zixuan and Bao, Linchao and Zhang, Song-Hai}, title = {MeGA: Hybrid Mesh-Gaussian Head Avatar for High-Fidelity Rendering and Head Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26274-26284} }
Image Generation Diversity Issues and How to Tame Them: Mischa Dombrowski,

Weitong Zhang,

Sarah Cechnicka,

Hadrien Reynaud,

Bernhard Kainz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dombrowski_2025_CVPR, author = {Dombrowski, Mischa and Zhang, Weitong and Cechnicka, Sarah and Reynaud, Hadrien and Kainz, Bernhard}, title = {Image Generation Diversity Issues and How to Tame Them}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3029-3039} }
Annotation Ambiguity Aware Semi-Supervised Medical Image Segmentation: Suruchi Kumari,

Pravendra Singh; [pdf] [supp]
[bibtex]
@InProceedings{Kumari_2025_CVPR, author = {Kumari, Suruchi and Singh, Pravendra}, title = {Annotation Ambiguity Aware Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10404-10413} }
Effective Cloud Removal for Remote Sensing Images by an Improved Mean-Reverting Denoising Model with Elucidated Design Space: Yi Liu,

Wengen Li,

Jihong Guan,

Shuigeng Zhou,

Yichao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yi and Li, Wengen and Guan, Jihong and Zhou, Shuigeng and Zhang, Yichao}, title = {Effective Cloud Removal for Remote Sensing Images by an Improved Mean-Reverting Denoising Model with Elucidated Design Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17851-17861} }
CAP4D: Creating Animatable 4D Portrait Avatars with Morphable Multi-View Diffusion Models: Felix Taubner,

Ruihang Zhang,

Mathieu Tuli,

David B. Lindell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taubner_2025_CVPR, author = {Taubner, Felix and Zhang, Ruihang and Tuli, Mathieu and Lindell, David B.}, title = {CAP4D: Creating Animatable 4D Portrait Avatars with Morphable Multi-View Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5318-5330} }
Comprehensive Information Bottleneck for Unveiling Universal Attribution to Interpret Vision Transformers: Jung-Ho Hong,

Ho-Joong Kim,

Kyu-Sung Jeon,

Seong-Whan Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Jung-Ho and Kim, Ho-Joong and Jeon, Kyu-Sung and Lee, Seong-Whan}, title = {Comprehensive Information Bottleneck for Unveiling Universal Attribution to Interpret Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25166-25175} }
OpticalNet: An Optical Imaging Dataset and Benchmark Beyond the Diffraction Limit: Benquan Wang,

Ruyi An,

Jin-Kyu So,

Sergei Kurdiumov,

Eng Aik Chan,

Giorgio Adamo,

Yuhan Peng,

Yewen Li,

Bo An; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Benquan and An, Ruyi and So, Jin-Kyu and Kurdiumov, Sergei and Chan, Eng Aik and Adamo, Giorgio and Peng, Yuhan and Li, Yewen and An, Bo}, title = {OpticalNet: An Optical Imaging Dataset and Benchmark Beyond the Diffraction Limit}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10900-10912} }
Dataset Distillation with Neural Characteristic Function: A Minmax Perspective: Shaobo Wang,

Yicun Yang,

Zhiyuan Liu,

Chenghao Sun,

Xuming Hu,

Conghui He,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shaobo and Yang, Yicun and Liu, Zhiyuan and Sun, Chenghao and Hu, Xuming and He, Conghui and Zhang, Linfeng}, title = {Dataset Distillation with Neural Characteristic Function: A Minmax Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25570-25580} }
Free-viewpoint Human Animation with Pose-correlated Reference Selection: Fa-Ting Hong,

Zhan Xu,

Haiyang Liu,

Qinjie Lin,

Luchuan Song,

Zhixin Shu,

Yang Zhou,

Duygu Ceylan,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Fa-Ting and Xu, Zhan and Liu, Haiyang and Lin, Qinjie and Song, Luchuan and Shu, Zhixin and Zhou, Yang and Ceylan, Duygu and Xu, Dan}, title = {Free-viewpoint Human Animation with Pose-correlated Reference Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26253-26262} }
CORE4D: A 4D Human-Object-Human Interaction Dataset for Collaborative Object REarrangement: Yun Liu,

Chengwen Zhang,

Ruofan Xing,

Bingda Tang,

Bowen Yang,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yun and Zhang, Chengwen and Xing, Ruofan and Tang, Bingda and Yang, Bowen and Yi, Li}, title = {CORE4D: A 4D Human-Object-Human Interaction Dataset for Collaborative Object REarrangement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1769-1782} }
PillarHist: A Quantization-aware Pillar Feature Encoder based on Height-aware Histogram: Sifan Zhou,

Zhihang Yuan,

Dawei Yang,

Xing Hu,

Jian Qian,

Ziyu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Sifan and Yuan, Zhihang and Yang, Dawei and Hu, Xing and Qian, Jian and Zhao, Ziyu}, title = {PillarHist: A Quantization-aware Pillar Feature Encoder based on Height-aware Histogram}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27336-27345} }
POp-GS: Next Best View in 3D-Gaussian Splatting with P-Optimality: Joey Wilson,

Marcelino Almeida,

Sachit Mahajan,

Martin Labrie,

Maani Ghaffari,

Omid Ghasemalizadeh,

Min Sun,

Cheng-Hao Kuo,

Arnab Sen; [pdf] [supp]
[bibtex]
@InProceedings{Wilson_2025_CVPR, author = {Wilson, Joey and Almeida, Marcelino and Mahajan, Sachit and Labrie, Martin and Ghaffari, Maani and Ghasemalizadeh, Omid and Sun, Min and Kuo, Cheng-Hao and Sen, Arnab}, title = {POp-GS: Next Best View in 3D-Gaussian Splatting with P-Optimality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3646-3655} }
Empowering Vector Graphics with Consistently Arbitrary Viewing and View-dependent Visibility: Yidi Li,

Jun Xiao,

Zhengda Lu,

Yiqun Wang,

Haiyong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yidi and Xiao, Jun and Lu, Zhengda and Wang, Yiqun and Jiang, Haiyong}, title = {Empowering Vector Graphics with Consistently Arbitrary Viewing and View-dependent Visibility}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18531-18540} }
Semantic and Expressive Variations in Image Captions Across Languages: Andre Ye,

Sebastin Santy,

Jena D. Hwang,

Amy X. Zhang,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Andre and Santy, Sebastin and Hwang, Jena D. and Zhang, Amy X. and Krishna, Ranjay}, title = {Semantic and Expressive Variations in Image Captions Across Languages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29667-29679} }
ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models: Xubing Ye,

Yukang Gan,

Yixiao Ge,

Xiao-Ping Zhang,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Xubing and Gan, Yukang and Ge, Yixiao and Zhang, Xiao-Ping and Tang, Yansong}, title = {ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24972-24982} }
ADD: Attribution-Driven Data Augmentation Framework for Boosting Image Super-Resolution: Ze-Yu Mi,

Yu-Bin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Mi_2025_CVPR, author = {Mi, Ze-Yu and Yang, Yu-Bin}, title = {ADD: Attribution-Driven Data Augmentation Framework for Boosting Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23101-23110} }
DIFFER: Disentangling Identity Features via Semantic Cues for Clothes-Changing Person Re-ID: Xin Liang,

Yogesh S Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Xin and Rawat, Yogesh S}, title = {DIFFER: Disentangling Identity Features via Semantic Cues for Clothes-Changing Person Re-ID}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13980-13989} }
HyperPose: Hypernetwork-Infused Camera Pose Localization and an Extended Cambridge Landmarks Dataset: Ron Ferens,

Yosi Keller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ferens_2025_CVPR, author = {Ferens, Ron and Keller, Yosi}, title = {HyperPose: Hypernetwork-Infused Camera Pose Localization and an Extended Cambridge Landmarks Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11547-11557} }
Critic-V: VLM Critics Help Catch VLM Errors in Multimodal Reasoning: Di Zhang,

Jingdi Lei,

Junxian Li,

Xunzhi Wang,

Yujie Liu,

Zonglin Yang,

Jiatong Li,

Weida Wang,

Suorong Yang,

Jianbo Wu,

Peng Ye,

Wanli Ouyang,

Dongzhan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Di and Lei, Jingdi and Li, Junxian and Wang, Xunzhi and Liu, Yujie and Yang, Zonglin and Li, Jiatong and Wang, Weida and Yang, Suorong and Wu, Jianbo and Ye, Peng and Ouyang, Wanli and Zhou, Dongzhan}, title = {Critic-V: VLM Critics Help Catch VLM Errors in Multimodal Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9050-9061} }
Mono3DVLT: Monocular-Video-Based 3D Visual Language Tracking: Hongkai Wei,

Yang Yang,

Shijie Sun,

Mingtao Feng,

Xiangyu Song,

Qi Lei,

Hongli Hu,

Rong Wang,

Huansheng Song,

Naveed Akhtar,

Ajmal Saeed Mian; [pdf]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Hongkai and Yang, Yang and Sun, Shijie and Feng, Mingtao and Song, Xiangyu and Lei, Qi and Hu, Hongli and Wang, Rong and Song, Huansheng and Akhtar, Naveed and Mian, Ajmal Saeed}, title = {Mono3DVLT: Monocular-Video-Based 3D Visual Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13886-13896} }
Towards Universal Dataset Distillation via Task-Driven Diffusion: Ding Qi,

Jian Li,

Junyao Gao,

Shuguang Dou,

Ying Tai,

Jianlong Hu,

Bo Zhao,

Yabiao Wang,

Chengjie Wang,

Cairong Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Ding and Li, Jian and Gao, Junyao and Dou, Shuguang and Tai, Ying and Hu, Jianlong and Zhao, Bo and Wang, Yabiao and Wang, Chengjie and Zhao, Cairong}, title = {Towards Universal Dataset Distillation via Task-Driven Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10557-10566} }
Parametric Point Cloud Completion for Polygonal Surface Reconstruction: Zhaiyu Chen,

Yuqing Wang,

Liangliang Nan,

Xiao Xiang Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhaiyu and Wang, Yuqing and Nan, Liangliang and Zhu, Xiao Xiang}, title = {Parametric Point Cloud Completion for Polygonal Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11749-11758} }
SyncSDE: A Probabilistic Framework for Diffusion Synchronization: Hyunjun Lee,

Hyunsoo Lee,

Sookwan Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Hyunjun and Lee, Hyunsoo and Han, Sookwan}, title = {SyncSDE: A Probabilistic Framework for Diffusion Synchronization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17508-17517} }
MaRI: Material Retrieval Integration across Domains: Jianhui Wang,

Zhifei Yang,

Yangfan He,

Huixiong Zhang,

Yuxuan Chen,

Jingwei Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jianhui and Yang, Zhifei and He, Yangfan and Zhang, Huixiong and Chen, Yuxuan and Huang, Jingwei}, title = {MaRI: Material Retrieval Integration across Domains}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5814-5823} }
MCCD: Multi-Agent Collaboration-based Compositional Diffusion for Complex Text-to-Image Generation: Mingcheng Li,

Xiaolu Hou,

Ziyang Liu,

Dingkang Yang,

Ziyun Qian,

Jiawei Chen,

Jinjie Wei,

Yue Jiang,

Qingyao Xu,

Lihua Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Mingcheng and Hou, Xiaolu and Liu, Ziyang and Yang, Dingkang and Qian, Ziyun and Chen, Jiawei and Wei, Jinjie and Jiang, Yue and Xu, Qingyao and Zhang, Lihua}, title = {MCCD: Multi-Agent Collaboration-based Compositional Diffusion for Complex Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13263-13272} }
Dual Semantic Guidance for Open Vocabulary Semantic Segmentation: Zhengyang Wang,

Tingliang Feng,

Fan Lyu,

Fanhua Shang,

Wei Feng,

Liang Wan; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhengyang and Feng, Tingliang and Lyu, Fan and Shang, Fanhua and Feng, Wei and Wan, Liang}, title = {Dual Semantic Guidance for Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20212-20222} }
CroCoDL: Cross-device Collaborative Dataset for Localization: Hermann Blum,

Alessandro Mercurio,

Joshua O'Reilly,

Tim Engelbracht,

Mihai Dusmanu,

Marc Pollefeys,

Zuria Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Blum_2025_CVPR, author = {Blum, Hermann and Mercurio, Alessandro and O'Reilly, Joshua and Engelbracht, Tim and Dusmanu, Mihai and Pollefeys, Marc and Bauer, Zuria}, title = {CroCoDL: Cross-device Collaborative Dataset for Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27424-27434} }
Q-Bench-Video: Benchmark the Video Quality Understanding of LMMs: Zicheng Zhang,

Ziheng Jia,

Haoning Wu,

Chunyi Li,

Zijian Chen,

Yingjie Zhou,

Wei Sun,

Xiaohong Liu,

Xiongkuo Min,

Weisi Lin,

Guangtao Zhai; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zicheng and Jia, Ziheng and Wu, Haoning and Li, Chunyi and Chen, Zijian and Zhou, Yingjie and Sun, Wei and Liu, Xiaohong and Min, Xiongkuo and Lin, Weisi and Zhai, Guangtao}, title = {Q-Bench-Video: Benchmark the Video Quality Understanding of LMMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3229-3239} }
Glossy Object Reconstruction with Cost-effective Polarized Acquisition: Bojian Wu,

Yifan Peng,

Ruizhen Hu,

Xiaowei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Bojian and Peng, Yifan and Hu, Ruizhen and Zhou, Xiaowei}, title = {Glossy Object Reconstruction with Cost-effective Polarized Acquisition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {422-431} }
Generalizable Object Keypoint Localization from Generative Priors: Dongkai Wang,

Jiang Duan,

Liangjian Wen,

Shiyu Xuan,

Hao Chen,

Shiliang Zhang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Dongkai and Duan, Jiang and Wen, Liangjian and Xuan, Shiyu and Chen, Hao and Zhang, Shiliang}, title = {Generalizable Object Keypoint Localization from Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20265-20274} }
CLIP is Almost All You Need: Towards Parameter-Efficient Scene Text Retrieval without OCR: Xugong Qin,

Peng Zhang,

Jun Jie Ou Yang,

Gangyan Zeng,

Yubo Li,

Yuanyuan Wang,

Wanqian Zhang,

Pengwen Dai; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Xugong and Zhang, Peng and Yang, Jun Jie Ou and Zeng, Gangyan and Li, Yubo and Wang, Yuanyuan and Zhang, Wanqian and Dai, Pengwen}, title = {CLIP is Almost All You Need: Towards Parameter-Efficient Scene Text Retrieval without OCR}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24873-24883} }
L-SWAG: Layer-Sample Wise Activation with Gradients Information for Zero-Shot NAS on Vision Transformers: Sofia Casarin,

Sergio Escalera,

Oswald Lanz; [pdf] [supp]
[bibtex]
@InProceedings{Casarin_2025_CVPR, author = {Casarin, Sofia and Escalera, Sergio and Lanz, Oswald}, title = {L-SWAG: Layer-Sample Wise Activation with Gradients Information for Zero-Shot NAS on Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4441-4451} }
Commonsense Video Question Answering through Video-Grounded Entailment Tree Reasoning: Huabin Liu,

Filip Ilievski,

Cees G. M. Snoek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Huabin and Ilievski, Filip and Snoek, Cees G. M.}, title = {Commonsense Video Question Answering through Video-Grounded Entailment Tree Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3262-3271} }
What Makes a Good Dataset for Knowledge Distillation?: Logan Frank,

Jim Davis; [pdf] [arXiv]
[bibtex]
@InProceedings{Frank_2025_CVPR, author = {Frank, Logan and Davis, Jim}, title = {What Makes a Good Dataset for Knowledge Distillation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23755-23764} }
Lifelong Knowledge Editing for Vision Language Models with Low-Rank Mixture-of-Experts: Qizhou Chen,

Chengyu Wang,

Dakan Wang,

Taolin Zhang,

Wangyue Li,

Xiaofeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Qizhou and Wang, Chengyu and Wang, Dakan and Zhang, Taolin and Li, Wangyue and He, Xiaofeng}, title = {Lifelong Knowledge Editing for Vision Language Models with Low-Rank Mixture-of-Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9455-9466} }
Rectification-specific Supervision and Constrained Estimator for Online Stereo Rectification: Rui Gong,

Kim-Hui Yap,

Weide Liu,

Xulei Yang,

Jun Cheng; [pdf]
[bibtex]
@InProceedings{Gong_2025_CVPR, author = {Gong, Rui and Yap, Kim-Hui and Liu, Weide and Yang, Xulei and Cheng, Jun}, title = {Rectification-specific Supervision and Constrained Estimator for Online Stereo Rectification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22348-22358} }
Shape and Texture: What Influences Reliable Optical Flow Estimation?: Libo Long,

Xiao Hu,

Jochen Lang; [pdf] [supp]
[bibtex]
@InProceedings{Long_2025_CVPR, author = {Long, Libo and Hu, Xiao and Lang, Jochen}, title = {Shape and Texture: What Influences Reliable Optical Flow Estimation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27894-27903} }
PartGen: Part-level 3D Generation and Reconstruction with Multi-view Diffusion Models: Minghao Chen,

Roman Shapovalov,

Iro Laina,

Tom Monnier,

Jianyuan Wang,

David Novotny,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Minghao and Shapovalov, Roman and Laina, Iro and Monnier, Tom and Wang, Jianyuan and Novotny, David and Vedaldi, Andrea}, title = {PartGen: Part-level 3D Generation and Reconstruction with Multi-view Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5881-5892} }
FedCALM: Conflict-aware Layer-wise Mitigation for Selective Aggregation in Deeper Personalized Federated Learning: Hao Zheng,

Zhigang Hu,

Liu Yang,

Meiguang Zheng,

Aikun Xu,

Boyu Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Hao and Hu, Zhigang and Yang, Liu and Zheng, Meiguang and Xu, Aikun and Wang, Boyu}, title = {FedCALM: Conflict-aware Layer-wise Mitigation for Selective Aggregation in Deeper Personalized Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15444-15453} }
SINR: Sparsity Driven Compressed Implicit Neural Representations: Dhananjaya Jayasundara,

Sudarshan Rajagopalan,

Yasiru Ranasinghe,

Trac D. Tran,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jayasundara_2025_CVPR, author = {Jayasundara, Dhananjaya and Rajagopalan, Sudarshan and Ranasinghe, Yasiru and Tran, Trac D. and Patel, Vishal M.}, title = {SINR: Sparsity Driven Compressed Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3061-3070} }
CaricatureBooth: Data-Free Interactive Caricature Generation in a Photo Booth: Zhiyu Qu,

Yunqi Miao,

Zhensong Zhang,

Jifei Song,

Jiankang Deng,

Yi-Zhe Song; [pdf] [supp]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Zhiyu and Miao, Yunqi and Zhang, Zhensong and Song, Jifei and Deng, Jiankang and Song, Yi-Zhe}, title = {CaricatureBooth: Data-Free Interactive Caricature Generation in a Photo Booth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10815-10824} }
FlexGS: Train Once, Deploy Everywhere with Many-in-One Flexible 3D Gaussian Splatting: Hengyu Liu,

Yuehao Wang,

Chenxin Li,

Ruisi Cai,

Kevin Wang,

Wuyang Li,

Pavlo Molchanov,

Peihao Wang,

Zhangyang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hengyu and Wang, Yuehao and Li, Chenxin and Cai, Ruisi and Wang, Kevin and Li, Wuyang and Molchanov, Pavlo and Wang, Peihao and Wang, Zhangyang}, title = {FlexGS: Train Once, Deploy Everywhere with Many-in-One Flexible 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16336-16345} }
Generalizing Deepfake Video Detection with Plug-and-Play: Video-Level Blending and Spatiotemporal Adapter Tuning: Zhiyuan Yan,

Yandan Zhao,

Shen Chen,

Mingyi Guo,

Xinghe Fu,

Taiping Yao,

Shouhong Ding,

Yunsheng Wu,

Li Yuan; [pdf]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Zhiyuan and Zhao, Yandan and Chen, Shen and Guo, Mingyi and Fu, Xinghe and Yao, Taiping and Ding, Shouhong and Wu, Yunsheng and Yuan, Li}, title = {Generalizing Deepfake Video Detection with Plug-and-Play: Video-Level Blending and Spatiotemporal Adapter Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12615-12625} }
ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning: Kailin Li,

Puhao Li,

Tengyu Liu,

Yuyang Li,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Kailin and Li, Puhao and Liu, Tengyu and Li, Yuyang and Huang, Siyuan}, title = {ManipTrans: Efficient Dexterous Bimanual Manipulation Transfer via Residual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6991-7003} }
Precise, Fast, and Low-cost Concept Erasure in Value Space: Orthogonal Complement Matters: Yuan Wang,

Ouxiang Li,

Tingting Mu,

Yanbin Hao,

Kuien Liu,

Xiang Wang,

Xiangnan He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuan and Li, Ouxiang and Mu, Tingting and Hao, Yanbin and Liu, Kuien and Wang, Xiang and He, Xiangnan}, title = {Precise, Fast, and Low-cost Concept Erasure in Value Space: Orthogonal Complement Matters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28759-28768} }
HOIGen-1M: A Large-scale Dataset for Human-Object Interaction Video Generation: Kun Liu,

Qi Liu,

Xinchen Liu,

Jie Li,

Yongdong Zhang,

Jiebo Luo,

Xiaodong He,

Wu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Kun and Liu, Qi and Liu, Xinchen and Li, Jie and Zhang, Yongdong and Luo, Jiebo and He, Xiaodong and Liu, Wu}, title = {HOIGen-1M: A Large-scale Dataset for Human-Object Interaction Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24001-24010} }
T2ISafety: Benchmark for Assessing Fairness, Toxicity, and Privacy in Image Generation: Lijun Li,

Zhelun Shi,

Xuhao Hu,

Bowen Dong,

Yiran Qin,

Xihui Liu,

Lu Sheng,

Jing Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Lijun and Shi, Zhelun and Hu, Xuhao and Dong, Bowen and Qin, Yiran and Liu, Xihui and Sheng, Lu and Shao, Jing}, title = {T2ISafety: Benchmark for Assessing Fairness, Toxicity, and Privacy in Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13381-13392} }
Order-One Rolling Shutter Cameras: Marvin Anas Hahn,

Kathlén Kohn,

Orlando Marigliano,

Tomas Pajdla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hahn_2025_CVPR, author = {Hahn, Marvin Anas and Kohn, Kathl\'en and Marigliano, Orlando and Pajdla, Tomas}, title = {Order-One Rolling Shutter Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27007-27016} }
Animate and Sound an Image: Xihua Wang,

Ruihua Song,

Chongxuan Li,

Xin Cheng,

Boyuan Li,

Yihan Wu,

Yuyue Wang,

Hongteng Xu,

Yunfeng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xihua and Song, Ruihua and Li, Chongxuan and Cheng, Xin and Li, Boyuan and Wu, Yihan and Wang, Yuyue and Xu, Hongteng and Wang, Yunfeng}, title = {Animate and Sound an Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23369-23378} }
Shining Yourself: High-Fidelity Ornaments Virtual Try-on with Diffusion Model: Yingmao Miao,

Zhanpeng Huang,

Rui Han,

Zibin Wang,

Chenhao Lin,

Chao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_CVPR, author = {Miao, Yingmao and Huang, Zhanpeng and Han, Rui and Wang, Zibin and Lin, Chenhao and Shen, Chao}, title = {Shining Yourself: High-Fidelity Ornaments Virtual Try-on with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {359-368} }
Foveated Instance Segmentation: Hongyi Zeng,

Wenxuan Liu,

Tianhua Xia,

Jinhui Chen,

Ziyun Li,

Sai Qian Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Hongyi and Liu, Wenxuan and Xia, Tianhua and Chen, Jinhui and Li, Ziyun and Zhang, Sai Qian}, title = {Foveated Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24496-24505} }
Make It Count: Text-to-Image Generation with an Accurate Number of Objects: Lital Binyamin,

Yoad Tewel,

Hilit Segev,

Eran Hirsch,

Royi Rassin,

Gal Chechik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Binyamin_2025_CVPR, author = {Binyamin, Lital and Tewel, Yoad and Segev, Hilit and Hirsch, Eran and Rassin, Royi and Chechik, Gal}, title = {Make It Count: Text-to-Image Generation with an Accurate Number of Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13242-13251} }
Universal Domain Adaptation for Semantic Segmentation: Seun-An Choe,

Keon-Hee Park,

Jinwoo Choi,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choe_2025_CVPR, author = {Choe, Seun-An and Park, Keon-Hee and Choi, Jinwoo and Park, Gyeong-Moon}, title = {Universal Domain Adaptation for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4607-4617} }
HyperGS: Hyperspectral 3D Gaussian Splatting: Christopher Thirgood,

Oscar Mendez,

Erin Ling,

Jon Storey,

Simon Hadfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thirgood_2025_CVPR, author = {Thirgood, Christopher and Mendez, Oscar and Ling, Erin and Storey, Jon and Hadfield, Simon}, title = {HyperGS: Hyperspectral 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5970-5979} }
Emphasizing Discriminative Features for Dataset Distillation in Complex Scenarios: Kai Wang,

Zekai Li,

Zhi-Qi Cheng,

Samir Khaki,

Ahmad Sajedi,

Ramakrishna Vedantam,

Konstantinos N Plataniotis,

Alexander Hauptmann,

Yang You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Kai and Li, Zekai and Cheng, Zhi-Qi and Khaki, Samir and Sajedi, Ahmad and Vedantam, Ramakrishna and Plataniotis, Konstantinos N and Hauptmann, Alexander and You, Yang}, title = {Emphasizing Discriminative Features for Dataset Distillation in Complex Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30451-30461} }
LMO: Linear Mamba Operator for MRI Reconstruction: Wei Li,

Jiawei Jiang,

Jie Wu,

Kaihao Yu,

Jianwei Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wei and Jiang, Jiawei and Wu, Jie and Yu, Kaihao and Zheng, Jianwei}, title = {LMO: Linear Mamba Operator for MRI Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5112-5122} }
AnomalyNCD: Towards Novel Anomaly Class Discovery in Industrial Scenarios: Ziming Huang,

Xurui Li,

Haotian Liu,

Feng Xue,

Yuzhe Wang,

Yu Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Ziming and Li, Xurui and Liu, Haotian and Xue, Feng and Wang, Yuzhe and Zhou, Yu}, title = {AnomalyNCD: Towards Novel Anomaly Class Discovery in Industrial Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4755-4765} }
Segment This Thing: Foveated Tokenization for Efficient Point-Prompted Segmentation: Tanner Schmidt,

Richard Newcombe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schmidt_2025_CVPR, author = {Schmidt, Tanner and Newcombe, Richard}, title = {Segment This Thing: Foveated Tokenization for Efficient Point-Prompted Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29428-29437} }
Task-Specific Gradient Adaptation for Few-Shot One-Class Classification: Yunlong Li,

Xiabi Liu,

Liyuan Pan,

Yuchen Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yunlong and Liu, Xiabi and Pan, Liyuan and Ren, Yuchen}, title = {Task-Specific Gradient Adaptation for Few-Shot One-Class Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30556-30565} }
TraF-Align: Trajectory-aware Feature Alignment for Asynchronous Multi-agent Perception: Zhiying Song,

Lei Yang,

Fuxi Wen,

Jun Li; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Zhiying and Yang, Lei and Wen, Fuxi and Li, Jun}, title = {TraF-Align: Trajectory-aware Feature Alignment for Asynchronous Multi-agent Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12048-12057} }
DreamCache: Finetuning-Free Lightweight Personalized Image Generation via Feature Caching: Emanuele Aiello,

Umberto Michieli,

Diego Valsesia,

Mete Ozay,

Enrico Magli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aiello_2025_CVPR, author = {Aiello, Emanuele and Michieli, Umberto and Valsesia, Diego and Ozay, Mete and Magli, Enrico}, title = {DreamCache: Finetuning-Free Lightweight Personalized Image Generation via Feature Caching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12480-12489} }
3D Gaussian Inpainting with Depth-Guided Cross-View Consistency: Sheng-Yu Huang,

Zi-Ting Chou,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Sheng-Yu and Chou, Zi-Ting and Wang, Yu-Chiang Frank}, title = {3D Gaussian Inpainting with Depth-Guided Cross-View Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26704-26713} }
Your Large Vision-Language Model Only Needs A Few Attention Heads For Visual Grounding: Seil Kang,

Jinyeong Kim,

Junhyeok Kim,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Seil and Kim, Jinyeong and Kim, Junhyeok and Hwang, Seong Jae}, title = {Your Large Vision-Language Model Only Needs A Few Attention Heads For Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9339-9350} }
FlexUOD: The Answer to Real-world Unsupervised Image Outlier Detection: Zhonghang Liu,

Kun Zhou,

Changshuo Wang,

Wen-Yan Lin,

Jiangbo Lu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhonghang and Zhou, Kun and Wang, Changshuo and Lin, Wen-Yan and Lu, Jiangbo}, title = {FlexUOD: The Answer to Real-world Unsupervised Image Outlier Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15183-15193} }
Ges3ViG : Incorporating Pointing Gestures into Language-Based 3D Visual Grounding for Embodied Reference Understanding: Atharv Mahesh Mane,

Dulanga Weerakoon,

Vigneshwaran Subbaraju,

Sougata Sen,

Sanjay E. Sarma,

Archan Misra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mane_2025_CVPR, author = {Mane, Atharv Mahesh and Weerakoon, Dulanga and Subbaraju, Vigneshwaran and Sen, Sougata and Sarma, Sanjay E. and Misra, Archan}, title = {Ges3ViG : Incorporating Pointing Gestures into Language-Based 3D Visual Grounding for Embodied Reference Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9017-9026} }
Focusing on Tracks for Online Multi-Object Tracking: Kyujin Shim,

Kangwook Ko,

Yujin Yang,

Changick Kim; [pdf] [supp]
[bibtex]
@InProceedings{Shim_2025_CVPR, author = {Shim, Kyujin and Ko, Kangwook and Yang, Yujin and Kim, Changick}, title = {Focusing on Tracks for Online Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11687-11696} }
Floxels: Fast Unsupervised Voxel Based Scene Flow Estimation: David T. Hoffmann,

Syed Haseeb Raza,

Hanqiu Jiang,

Denis Tananaev,

Steffen Klingenhoefer,

Martin Meinke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hoffmann_2025_CVPR, author = {Hoffmann, David T. and Raza, Syed Haseeb and Jiang, Hanqiu and Tananaev, Denis and Klingenhoefer, Steffen and Meinke, Martin}, title = {Floxels: Fast Unsupervised Voxel Based Scene Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22328-22337} }
LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale: Joya Chen,

Ziyun Zeng,

Yiqi Lin,

Wei Li,

Zejun Ma,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Joya and Zeng, Ziyun and Lin, Yiqi and Li, Wei and Ma, Zejun and Shou, Mike Zheng}, title = {LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29083-29095} }
Identity-preserving Distillation Sampling by Fixed-Point Iterator: SeonHwa Kim,

Jiwon Kim,

Soobin Park,

Donghoon Ahn,

Jiwon Kang,

Seungryong Kim,

Kyong Hwan Jin,

Eunju Cha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, SeonHwa and Kim, Jiwon and Park, Soobin and Ahn, Donghoon and Kang, Jiwon and Kim, Seungryong and Jin, Kyong Hwan and Cha, Eunju}, title = {Identity-preserving Distillation Sampling by Fixed-Point Iterator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11115-11124} }
Progressive Focused Transformer for Single Image Super-Resolution: Wei Long,

Xingyu Zhou,

Leheng Zhang,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2025_CVPR, author = {Long, Wei and Zhou, Xingyu and Zhang, Leheng and Gu, Shuhang}, title = {Progressive Focused Transformer for Single Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2279-2288} }
VladVA: Discriminative Fine-tuning of LVLMs: Yassine Ouali,

Adrian Bulat,

Alexandros Xenos,

Anestis Zaganidis,

Ioannis Maniadis Metaxas,

Brais Martinez,

Georgios Tzimiropoulos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouali_2025_CVPR, author = {Ouali, Yassine and Bulat, Adrian and Xenos, Alexandros and Zaganidis, Anestis and Metaxas, Ioannis Maniadis and Martinez, Brais and Tzimiropoulos, Georgios}, title = {VladVA: Discriminative Fine-tuning of LVLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4101-4111} }
FlexiDiT: Your Diffusion Transformer Can Easily Generate High-Quality Samples with Less Compute: Sotiris Anagnostidis,

Gregor Bachmann,

Yeongmin Kim,

Jonas Kohler,

Markos Georgopoulos,

Artsiom Sanakoyeu,

Yuming Du,

Albert Pumarola,

Ali Thabet,

Edgar Schönfeld; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Anagnostidis_2025_CVPR, author = {Anagnostidis, Sotiris and Bachmann, Gregor and Kim, Yeongmin and Kohler, Jonas and Georgopoulos, Markos and Sanakoyeu, Artsiom and Du, Yuming and Pumarola, Albert and Thabet, Ali and Sch\"onfeld, Edgar}, title = {FlexiDiT: Your Diffusion Transformer Can Easily Generate High-Quality Samples with Less Compute}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28316-28326} }
WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild: Rolandos Alexandros Potamias,

Jinglei Zhang,

Jiankang Deng,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Potamias_2025_CVPR, author = {Potamias, Rolandos Alexandros and Zhang, Jinglei and Deng, Jiankang and Zafeiriou, Stefanos}, title = {WiLoR: End-to-end 3D Hand Localization and Reconstruction in-the-wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12242-12254} }
HumanMM: Global Human Motion Recovery from Multi-shot Videos: Yuhong Zhang,

Guanlin Wu,

Ling-Hao Chen,

Zhuokai Zhao,

Jing Lin,

Xiaoke Jiang,

Jiamin Wu,

Zhuoheng Li,

Hao Frank Yang,

Haoqian Wang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuhong and Wu, Guanlin and Chen, Ling-Hao and Zhao, Zhuokai and Lin, Jing and Jiang, Xiaoke and Wu, Jiamin and Li, Zhuoheng and Yang, Hao Frank and Wang, Haoqian and Zhang, Lei}, title = {HumanMM: Global Human Motion Recovery from Multi-shot Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1973-1983} }
Removing Reflections from RAW Photos: Eric Kee,

Adam Pikielny,

Kevin Blackburn-Matzen,

Marc Levoy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kee_2025_CVPR, author = {Kee, Eric and Pikielny, Adam and Blackburn-Matzen, Kevin and Levoy, Marc}, title = {Removing Reflections from RAW Photos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {161-171} }
BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models: Taha Koleilat,

Hojat Asgariandehkordi,

Hassan Rivaz,

Yiming Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koleilat_2025_CVPR, author = {Koleilat, Taha and Asgariandehkordi, Hojat and Rivaz, Hassan and Xiao, Yiming}, title = {BiomedCoOp: Learning to Prompt for Biomedical Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14766-14776} }
AMR-Transformer: Enabling Efficient Long-range Interaction for Complex Neural Fluid Simulation: Zeyi Xu,

Jinfan Liu,

Kuangxu Chen,

Ye Chen,

Zhangli Hu,

Bingbing Ni; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zeyi and Liu, Jinfan and Chen, Kuangxu and Chen, Ye and Hu, Zhangli and Ni, Bingbing}, title = {AMR-Transformer: Enabling Efficient Long-range Interaction for Complex Neural Fluid Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5804-5813} }
MV-SSM: Multi-View State Space Modeling for 3D Human Pose Estimation: Aviral Chharia,

Wenbo Gou,

Haoye Dong; [pdf] [supp]
[bibtex]
@InProceedings{Chharia_2025_CVPR, author = {Chharia, Aviral and Gou, Wenbo and Dong, Haoye}, title = {MV-SSM: Multi-View State Space Modeling for 3D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11590-11599} }
HyperGLM: HyperGraph for Video Scene Graph Generation and Anticipation: Trong-Thuan Nguyen,

Pha Nguyen,

Jackson Cothren,

Alper Yilmaz,

Khoa Luu; [pdf] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Trong-Thuan and Nguyen, Pha and Cothren, Jackson and Yilmaz, Alper and Luu, Khoa}, title = {HyperGLM: HyperGraph for Video Scene Graph Generation and Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29150-29160} }
AnySat: One Earth Observation Model for Many Resolutions, Scales, and Modalities: Guillaume Astruc,

Nicolas Gonthier,

Clément Mallet,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Astruc_2025_CVPR, author = {Astruc, Guillaume and Gonthier, Nicolas and Mallet, Cl\'ement and Landrieu, Loic}, title = {AnySat: One Earth Observation Model for Many Resolutions, Scales, and Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19530-19540} }
FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning: Gaojian Wang,

Feng Lin,

Tong Wu,

Zhenguang Liu,

Zhongjie Ba,

Kui Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Gaojian and Lin, Feng and Wu, Tong and Liu, Zhenguang and Ba, Zhongjie and Ren, Kui}, title = {FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24364-24376} }
OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video Understanding?: Junbo Niu,

Yifei Li,

Ziyang Miao,

Chunjiang Ge,

Yuanhang Zhou,

Qihao He,

Xiaoyi Dong,

Haodong Duan,

Shuangrui Ding,

Rui Qian,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Conghui He,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Niu_2025_CVPR, author = {Niu, Junbo and Li, Yifei and Miao, Ziyang and Ge, Chunjiang and Zhou, Yuanhang and He, Qihao and Dong, Xiaoyi and Duan, Haodong and Ding, Shuangrui and Qian, Rui and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and He, Conghui and Wang, Jiaqi}, title = {OVO-Bench: How Far is Your Video-LLMs from Real-World Online Video Understanding?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18902-18913} }
AlignMamba: Enhancing Multimodal Mamba with Local and Global Cross-modal Alignment: Yan Li,

Yifei Xing,

Xiangyuan Lan,

Xin Li,

Haifeng Chen,

Dongmei Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yan and Xing, Yifei and Lan, Xiangyuan and Li, Xin and Chen, Haifeng and Jiang, Dongmei}, title = {AlignMamba: Enhancing Multimodal Mamba with Local and Global Cross-modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24774-24784} }
Blurry-Edges: Photon-Limited Depth Estimation from Defocused Boundaries: Wei Xu,

Charles James Wagner,

Junjie Luo,

Qi Guo; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Wei and Wagner, Charles James and Luo, Junjie and Guo, Qi}, title = {Blurry-Edges: Photon-Limited Depth Estimation from Defocused Boundaries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {432-441} }
VideoComp: Advancing Fine-Grained Compositional and Temporal Alignment in Video-Text Models: Dahun Kim,

AJ Piergiovanni,

Ganesh Mallya,

Anelia Angelova; [pdf] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Dahun and Piergiovanni, AJ and Mallya, Ganesh and Angelova, Anelia}, title = {VideoComp: Advancing Fine-Grained Compositional and Temporal Alignment in Video-Text Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29060-29070} }
One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion: Chunyang Cheng,

Tianyang Xu,

Zhenhua Feng,

Xiaojun Wu,

Zhangyong Tang,

Hui Li,

Zeyang Zhang,

Sara Atito,

Muhammad Awais,

Josef Kittler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Chunyang and Xu, Tianyang and Feng, Zhenhua and Wu, Xiaojun and Tang, Zhangyong and Li, Hui and Zhang, Zeyang and Atito, Sara and Awais, Muhammad and Kittler, Josef}, title = {One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28102-28112} }
MICAS: Multi-grained In-Context Adaptive Sampling for 3D Point Cloud Processing: Feifei Shao,

Ping Liu,

Zhao Wang,

Yawei Luo,

Hongwei Wang,

Jun Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Feifei and Liu, Ping and Wang, Zhao and Luo, Yawei and Wang, Hongwei and Xiao, Jun}, title = {MICAS: Multi-grained In-Context Adaptive Sampling for 3D Point Cloud Processing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6616-6626} }
Can Text-to-Video Generation help Video-Language Alignment?: Luca Zanella,

Massimiliano Mancini,

Willi Menapace,

Sergey Tulyakov,

Yiming Wang,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zanella_2025_CVPR, author = {Zanella, Luca and Mancini, Massimiliano and Menapace, Willi and Tulyakov, Sergey and Wang, Yiming and Ricci, Elisa}, title = {Can Text-to-Video Generation help Video-Language Alignment?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24097-24107} }
GoalFlow: Goal-Driven Flow Matching for Multimodal Trajectories Generation in End-to-End Autonomous Driving: Zebin Xing,

Xingyu Zhang,

Yang Hu,

Bo Jiang,

Tong He,

Qian Zhang,

Xiaoxiao Long,

Wei Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Zebin and Zhang, Xingyu and Hu, Yang and Jiang, Bo and He, Tong and Zhang, Qian and Long, Xiaoxiao and Yin, Wei}, title = {GoalFlow: Goal-Driven Flow Matching for Multimodal Trajectories Generation in End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1602-1611} }
GuardSplat: Efficient and Robust Watermarking for 3D Gaussian Splatting: Zixuan Chen,

Guangcong Wang,

Jiahao Zhu,

Jianhuang Lai,

Xiaohua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Wang, Guangcong and Zhu, Jiahao and Lai, Jianhuang and Xie, Xiaohua}, title = {GuardSplat: Efficient and Robust Watermarking for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16325-16335} }
Weakly Supervised Contrastive Adversarial Training for Learning Robust Features from Semi-supervised Data: Lilin Zhang,

Chengpei Wu,

Ning Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lilin and Wu, Chengpei and Yang, Ning}, title = {Weakly Supervised Contrastive Adversarial Training for Learning Robust Features from Semi-supervised Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25718-25727} }
From Poses to Identity: Training-Free Person Re-Identification via Feature Centralization: Chao Yuan,

Guiwei Zhang,

Changxiao Ma,

Tianyi Zhang,

Guanglin Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Chao and Zhang, Guiwei and Ma, Changxiao and Zhang, Tianyi and Niu, Guanglin}, title = {From Poses to Identity: Training-Free Person Re-Identification via Feature Centralization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24409-24418} }
ColabSfM: Collaborative Structure-from-Motion by Point Cloud Registration: Johan Edstedt,

André Mateus,

Alberto Jaenal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Edstedt_2025_CVPR, author = {Edstedt, Johan and Mateus, Andr\'e and Jaenal, Alberto}, title = {ColabSfM: Collaborative Structure-from-Motion by Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6573-6583} }
RoadSocial: A Diverse VideoQA Dataset and Benchmark for Road Event Understanding from Social Video Narratives: Chirag Parikh,

Deepti Rawat,

Rakshitha R. T.,

Tathagata Ghosh,

Ravi Kiran Sarvadevabhatla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parikh_2025_CVPR, author = {Parikh, Chirag and Rawat, Deepti and T., Rakshitha R. and Ghosh, Tathagata and Sarvadevabhatla, Ravi Kiran}, title = {RoadSocial: A Diverse VideoQA Dataset and Benchmark for Road Event Understanding from Social Video Narratives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19002-19011} }
MangaNinja: Line Art Colorization with Precise Reference Following: Zhiheng Liu,

Ka Leong Cheng,

Xi Chen,

Jie Xiao,

Hao Ouyang,

Kai Zhu,

Yu Liu,

Yujun Shen,

Qifeng Chen,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhiheng and Cheng, Ka Leong and Chen, Xi and Xiao, Jie and Ouyang, Hao and Zhu, Kai and Liu, Yu and Shen, Yujun and Chen, Qifeng and Luo, Ping}, title = {MangaNinja: Line Art Colorization with Precise Reference Following}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5666-5677} }
Nonisotropic Gaussian Diffusion for Realistic 3D Human Motion Prediction: Cecilia Curreli,

Dominik Muhle,

Abhishek Saroha,

Zhenzhang Ye,

Riccardo Marin,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Curreli_2025_CVPR, author = {Curreli, Cecilia and Muhle, Dominik and Saroha, Abhishek and Ye, Zhenzhang and Marin, Riccardo and Cremers, Daniel}, title = {Nonisotropic Gaussian Diffusion for Realistic 3D Human Motion Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1871-1882} }
Is Your World Simulator a Good Story Presenter? A Consecutive Events-Based Benchmark for Future Long Video Generation: Yiping Wang,

Xuehai He,

Kuan Wang,

Luyao Ma,

Jianwei Yang,

Shuohang Wang,

Simon Shaolei Du,

Yelong Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yiping and He, Xuehai and Wang, Kuan and Ma, Luyao and Yang, Jianwei and Wang, Shuohang and Du, Simon Shaolei and Shen, Yelong}, title = {Is Your World Simulator a Good Story Presenter? A Consecutive Events-Based Benchmark for Future Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13629-13638} }
LookCloser: Frequency-aware Radiance Field for Tiny-Detail Scene: Xiaoyu Zhang,

Weihong Pan,

Chong Bao,

Xiyu Zhang,

Xiaojun Xiang,

Hanqing Jiang,

Hujun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiaoyu and Pan, Weihong and Bao, Chong and Zhang, Xiyu and Xiang, Xiaojun and Jiang, Hanqing and Bao, Hujun}, title = {LookCloser: Frequency-aware Radiance Field for Tiny-Detail Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16122-16132} }
PICO: Reconstructing 3D People In Contact with Objects: Alpár Cseke,

Shashank Tripathi,

Sai Kumar Dwivedi,

Arjun S. Lakshmipathy,

Agniv Chatterjee,

Michael J. Black,

Dimitrios Tzionas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cseke_2025_CVPR, author = {Cseke, Alp\'ar and Tripathi, Shashank and Dwivedi, Sai Kumar and Lakshmipathy, Arjun S. and Chatterjee, Agniv and Black, Michael J. and Tzionas, Dimitrios}, title = {PICO: Reconstructing 3D People In Contact with Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1783-1794} }
Convex Relaxation for Robust Vanishing Point Estimation in Manhattan World: Bangyan Liao,

Zhenjun Zhao,

Haoang Li,

Yi Zhou,

Yingping Zeng,

Hao Li,

Peidong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Bangyan and Zhao, Zhenjun and Li, Haoang and Zhou, Yi and Zeng, Yingping and Li, Hao and Liu, Peidong}, title = {Convex Relaxation for Robust Vanishing Point Estimation in Manhattan World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15823-15832} }
Linguistics-aware Masked Image Modeling for Self-supervised Scene Text Recognition: Yifei Zhang,

Chang Liu,

Jin Wei,

Xiaomeng Yang,

Yu Zhou,

Can Ma,

Xiangyang Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yifei and Liu, Chang and Wei, Jin and Yang, Xiaomeng and Zhou, Yu and Ma, Can and Ji, Xiangyang}, title = {Linguistics-aware Masked Image Modeling for Self-supervised Scene Text Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9318-9328} }
FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting: Fangyu Wu,

Yuhao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Fangyu and Chen, Yuhao}, title = {FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11051-11060} }
Scaling up Image Segmentation across Data and Tasks: Pei Wang,

Zhaowei Cai,

Hao Yang,

Ashwin Swaminathan,

R. Manmatha,

Stefano Soatto; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Pei and Cai, Zhaowei and Yang, Hao and Swaminathan, Ashwin and Manmatha, R. and Soatto, Stefano}, title = {Scaling up Image Segmentation across Data and Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4573-4583} }
Take the Bull by the Horns: Learning to Segment Hard Samples: Yuan Guo,

Jingyu Kong,

Yu Wang,

Yuping Duan; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yuan and Kong, Jingyu and Wang, Yu and Duan, Yuping}, title = {Take the Bull by the Horns: Learning to Segment Hard Samples}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15642-15652} }
MIMO: A Medical Vision Language Model with Visual Referring Multimodal Input and Pixel Grounding Multimodal Output: Yanyuan Chen,

Dexuan Xu,

Yu Huang,

Songkun Zhan,

Hanpin Wang,

Dongxue Chen,

Xueping Wang,

Meikang Qiu,

Hang Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yanyuan and Xu, Dexuan and Huang, Yu and Zhan, Songkun and Wang, Hanpin and Chen, Dongxue and Wang, Xueping and Qiu, Meikang and Li, Hang}, title = {MIMO: A Medical Vision Language Model with Visual Referring Multimodal Input and Pixel Grounding Multimodal Output}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24732-24741} }
Bias for Action: Video Implicit Neural Representations with Bias Modulation: Alper Kayabasi,

Anil Kumar Vadathya,

Guha Balakrishnan,

Vishwanath Saragadam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kayabasi_2025_CVPR, author = {Kayabasi, Alper and Vadathya, Anil Kumar and Balakrishnan, Guha and Saragadam, Vishwanath}, title = {Bias for Action: Video Implicit Neural Representations with Bias Modulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27999-28008} }
Bridging Past and Future: End-to-End Autonomous Driving with Historical Prediction and Planning: Bozhou Zhang,

Nan Song,

Xin Jin,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bozhou and Song, Nan and Jin, Xin and Zhang, Li}, title = {Bridging Past and Future: End-to-End Autonomous Driving with Historical Prediction and Planning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6854-6863} }
Blood Flow Speed Estimation with Optical Coherence Tomography Angiography Images: Wensheng Cheng,

Zhenghong Li,

Jiaxiang Ren,

Hyomin Jeong,

Congwu Du,

Yingtian Pan,

Haibin Ling; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Wensheng and Li, Zhenghong and Ren, Jiaxiang and Jeong, Hyomin and Du, Congwu and Pan, Yingtian and Ling, Haibin}, title = {Blood Flow Speed Estimation with Optical Coherence Tomography Angiography Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10466-10475} }
DreamTrack: Dreaming the Future for Multimodal Visual Object Tracking: Mingzhe Guo,

Weiping Tan,

Wenyu Ran,

Liping Jing,

Zhipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Mingzhe and Tan, Weiping and Ran, Wenyu and Jing, Liping and Zhang, Zhipeng}, title = {DreamTrack: Dreaming the Future for Multimodal Visual Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7201-7210} }
OmniStyle: Filtering High Quality Style Transfer Data at Scale: Ye Wang,

Ruiqi Liu,

Jiang Lin,

Fei Liu,

Zili Yi,

Yilin Wang,

Rui Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ye and Liu, Ruiqi and Lin, Jiang and Liu, Fei and Yi, Zili and Wang, Yilin and Ma, Rui}, title = {OmniStyle: Filtering High Quality Style Transfer Data at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7847-7856} }
EIDT-V: Exploiting Intersections in Diffusion Trajectories for Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation: Diljeet Jagpal,

Xi Chen,

Vinay P. Namboodiri; [pdf] [supp]
[bibtex]
@InProceedings{Jagpal_2025_CVPR, author = {Jagpal, Diljeet and Chen, Xi and Namboodiri, Vinay P.}, title = {EIDT-V: Exploiting Intersections in Diffusion Trajectories for Model-Agnostic, Zero-Shot, Training-Free Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18219-18228} }
Cross-View Completion Models are Zero-shot Correspondence Estimators: Honggyu An,

Jin Hyeon Kim,

Seonghoon Park,

Jaewoo Jung,

Jisang Han,

Sunghwan Hong,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2025_CVPR, author = {An, Honggyu and Kim, Jin Hyeon and Park, Seonghoon and Jung, Jaewoo and Han, Jisang and Hong, Sunghwan and Kim, Seungryong}, title = {Cross-View Completion Models are Zero-shot Correspondence Estimators}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1103-1115} }
Multi-party Collaborative Attention Control for Image Customization: Han Yang,

Chuanguang Yang,

Qiuli Wang,

Zhulin An,

Weilun Feng,

Libo Huang,

Yongjun Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Han and Yang, Chuanguang and Wang, Qiuli and An, Zhulin and Feng, Weilun and Huang, Libo and Xu, Yongjun}, title = {Multi-party Collaborative Attention Control for Image Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7942-7951} }
Reproducible Vision-Language Models Meet Concepts Out of Pre-Training: Ziliang Chen,

Xin Huang,

Xiaoxuan Fan,

Keze Wang,

Yuyu Zhou,

Quanlong Guan,

Liang Lin; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ziliang and Huang, Xin and Fan, Xiaoxuan and Wang, Keze and Zhou, Yuyu and Guan, Quanlong and Lin, Liang}, title = {Reproducible Vision-Language Models Meet Concepts Out of Pre-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14701-14711} }
Through-The-Mask: Mask-based Motion Trajectories for Image-to-Video Generation: Guy Yariv,

Yuval Kirstain,

Amit Zohar,

Shelly Sheynin,

Yaniv Taigman,

Yossi Adi,

Sagie Benaim,

Adam Polyak; [pdf] [supp]
[bibtex]
@InProceedings{Yariv_2025_CVPR, author = {Yariv, Guy and Kirstain, Yuval and Zohar, Amit and Sheynin, Shelly and Taigman, Yaniv and Adi, Yossi and Benaim, Sagie and Polyak, Adam}, title = {Through-The-Mask: Mask-based Motion Trajectories for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18198-18208} }
MAGE : Single Image to Material-Aware 3D via the Multi-View G-Buffer Estimation Model: Haoyuan Wang,

Zhenwei Wang,

Xiaoxiao Long,

Cheng Lin,

Gerhard Hancke,

Rynson W.H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haoyuan and Wang, Zhenwei and Long, Xiaoxiao and Lin, Cheng and Hancke, Gerhard and Lau, Rynson W.H.}, title = {MAGE : Single Image to Material-Aware 3D via the Multi-View G-Buffer Estimation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10985-10995} }
Segment Anything, Even Occluded: Wei-En Tai,

Yu-Lin Shih,

Cheng Sun,

Yu-Chiang Frank Wang,

Hwann-Tzong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tai_2025_CVPR, author = {Tai, Wei-En and Shih, Yu-Lin and Sun, Cheng and Wang, Yu-Chiang Frank and Chen, Hwann-Tzong}, title = {Segment Anything, Even Occluded}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29385-29394} }
HOT3D: Hand and Object Tracking in 3D from Egocentric Multi-View Videos: Prithviraj Banerjee,

Sindi Shkodrani,

Pierre Moulon,

Shreyas Hampali,

Shangchen Han,

Fan Zhang,

Linguang Zhang,

Jade Fountain,

Edward Miller,

Selen Basol,

Richard Newcombe,

Robert Wang,

Jakob Julian Engel,

Tomas Hodan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Banerjee_2025_CVPR, author = {Banerjee, Prithviraj and Shkodrani, Sindi and Moulon, Pierre and Hampali, Shreyas and Han, Shangchen and Zhang, Fan and Zhang, Linguang and Fountain, Jade and Miller, Edward and Basol, Selen and Newcombe, Richard and Wang, Robert and Engel, Jakob Julian and Hodan, Tomas}, title = {HOT3D: Hand and Object Tracking in 3D from Egocentric Multi-View Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7061-7071} }
DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation: Zhiqiang Shen,

Ammar Sherif,

Zeyuan Yin,

Shitong Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Zhiqiang and Sherif, Ammar and Yin, Zeyuan and Shao, Shitong}, title = {DELT: A Simple Diversity-driven EarlyLate Training for Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4797-4806} }
MESC-3D:Mining Effective Semantic Cues for 3D Reconstruction from a Single Image: Shaoming Li,

Qing Cai,

Songqi Kong,

Runqing Tan,

Heng Tong,

Shiji Qiu,

Yongguo Jiang,

Zhi Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shaoming and Cai, Qing and Kong, Songqi and Tan, Runqing and Tong, Heng and Qiu, Shiji and Jiang, Yongguo and Liu, Zhi}, title = {MESC-3D:Mining Effective Semantic Cues for 3D Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16912-16921} }
RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete: Yuheng Ji,

Huajie Tan,

Jiayu Shi,

Xiaoshuai Hao,

Yuan Zhang,

Hengyuan Zhang,

Pengwei Wang,

Mengdi Zhao,

Yao Mu,

Pengju An,

Xinda Xue,

Qinghang Su,

Huaihai Lyu,

Xiaolong Zheng,

Jiaming Liu,

Zhongyuan Wang,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Yuheng and Tan, Huajie and Shi, Jiayu and Hao, Xiaoshuai and Zhang, Yuan and Zhang, Hengyuan and Wang, Pengwei and Zhao, Mengdi and Mu, Yao and An, Pengju and Xue, Xinda and Su, Qinghang and Lyu, Huaihai and Zheng, Xiaolong and Liu, Jiaming and Wang, Zhongyuan and Zhang, Shanghang}, title = {RoboBrain: A Unified Brain Model for Robotic Manipulation from Abstract to Concrete}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1724-1734} }
Advancing Multiple Instance Learning with Continual Learning for Whole Slide Imaging: Xianrui Li,

Yufei Cui,

Jun Li,

Antoni B. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xianrui and Cui, Yufei and Li, Jun and Chan, Antoni B.}, title = {Advancing Multiple Instance Learning with Continual Learning for Whole Slide Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20800-20809} }
Beyond Image Classification: A Video Benchmark and Dual-Branch Hybrid Discrimination Framework for Compositional Zero-Shot Learning: Dongyao Jiang,

Haodong Jing,

Yongqiang Ma,

Nanning Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Dongyao and Jing, Haodong and Ma, Yongqiang and Zheng, Nanning}, title = {Beyond Image Classification: A Video Benchmark and Dual-Branch Hybrid Discrimination Framework for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9860-9869} }
ABBSPO: Adaptive Bounding Box Scaling and Symmetric Prior based Orientation Prediction for Detecting Aerial Image Objects: Woojin Lee,

Hyugjae Chang,

Jaeho Moon,

Jaehyup Lee,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Woojin and Chang, Hyugjae and Moon, Jaeho and Lee, Jaehyup and Kim, Munchurl}, title = {ABBSPO: Adaptive Bounding Box Scaling and Symmetric Prior based Orientation Prediction for Detecting Aerial Image Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8848-8858} }
Decoupled Distillation to Erase: A General Unlearning Method for Any Class-centric Tasks: Yu Zhou,

Dian Zheng,

Qijie Mo,

Renjie Lu,

Kun-Yu Lin,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yu and Zheng, Dian and Mo, Qijie and Lu, Renjie and Lin, Kun-Yu and Zheng, Wei-Shi}, title = {Decoupled Distillation to Erase: A General Unlearning Method for Any Class-centric Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20350-20359} }
TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions: Wang Yu-Hang,

Junkang Guo,

Aolei Liu,

Kaihao Wang,

Zaitong Wu,

Zhenyu Liu,

Wenfei Yin,

Jian Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yu-Hang_2025_CVPR, author = {Yu-Hang, Wang and Guo, Junkang and Liu, Aolei and Wang, Kaihao and Wu, Zaitong and Liu, Zhenyu and Yin, Wenfei and Liu, Jian}, title = {TAET: Two-Stage Adversarial Equalization Training on Long-Tailed Distributions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15476-15485} }
Few-shot Personalized Scanpath Prediction: Ruoyu Xue,

Jingyi Xu,

Sounak Mondal,

Hieu Le,

Greg Zelinsky,

Minh Hoai,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Ruoyu and Xu, Jingyi and Mondal, Sounak and Le, Hieu and Zelinsky, Greg and Hoai, Minh and Samaras, Dimitris}, title = {Few-shot Personalized Scanpath Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13497-13507} }
Do Your Best and Get Enough Rest for Continual Learning: Hankyul Kang,

Gregor Seifer,

Donghyun Lee,

Jongbin Ryu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Hankyul and Seifer, Gregor and Lee, Donghyun and Ryu, Jongbin}, title = {Do Your Best and Get Enough Rest for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10077-10086} }
Enhancing Few-Shot Class-Incremental Learning via Training-Free Bi-Level Modality Calibration: Yiyang Chen,

Tianyu Ding,

Lei Wang,

Jing Huo,

Yang Gao,

Wenbin Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yiyang and Ding, Tianyu and Wang, Lei and Huo, Jing and Gao, Yang and Li, Wenbin}, title = {Enhancing Few-Shot Class-Incremental Learning via Training-Free Bi-Level Modality Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9881-9890} }
MUSt3R: Multi-view Network for Stereo 3D Reconstruction: Yohann Cabon,

Lucas Stoffl,

Leonid Antsfeld,

Gabriela Csurka,

Boris Chidlovskii,

Jerome Revaud,

Vincent Leroy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cabon_2025_CVPR, author = {Cabon, Yohann and Stoffl, Lucas and Antsfeld, Leonid and Csurka, Gabriela and Chidlovskii, Boris and Revaud, Jerome and Leroy, Vincent}, title = {MUSt3R: Multi-view Network for Stereo 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1050-1060} }
Hybrid-Level Instruction Injection for Video Token Compression in Multi-modal Large Language Models: Zhihang Liu,

Chen-Wei Xie,

Pandeng Li,

Liming Zhao,

Longxiang Tang,

Yun Zheng,

Chuanbin Liu,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhihang and Xie, Chen-Wei and Li, Pandeng and Zhao, Liming and Tang, Longxiang and Zheng, Yun and Liu, Chuanbin and Xie, Hongtao}, title = {Hybrid-Level Instruction Injection for Video Token Compression in Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8568-8578} }
Mamba4D: Efficient 4D Point Cloud Video Understanding with Disentangled Spatial-Temporal State Space Models: Jiuming Liu,

Jinru Han,

Lihao Liu,

Angelica I. Aviles-Rivero,

Chaokang Jiang,

Zhe Liu,

Hesheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiuming and Han, Jinru and Liu, Lihao and Aviles-Rivero, Angelica I. and Jiang, Chaokang and Liu, Zhe and Wang, Hesheng}, title = {Mamba4D: Efficient 4D Point Cloud Video Understanding with Disentangled Spatial-Temporal State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17626-17636} }
Mamba as a Bridge: Where Vision Foundation Models Meet Vision Language Models for Domain-Generalized Semantic Segmentation: Xin Zhang,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Tan, Robby T.}, title = {Mamba as a Bridge: Where Vision Foundation Models Meet Vision Language Models for Domain-Generalized Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14527-14537} }
Vision-Guided Action: Enhancing 3D Human Motion Prediction with Gaze-informed Affordance in 3D Scenes: Ting Yu,

Yi Lin,

Jun Yu,

Zhenyu Lou,

Qiongjie Cui; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Ting and Lin, Yi and Yu, Jun and Lou, Zhenyu and Cui, Qiongjie}, title = {Vision-Guided Action: Enhancing 3D Human Motion Prediction with Gaze-informed Affordance in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12335-12346} }
LOGICZSL: Exploring Logic-induced Representation for Compositional Zero-shot Learning: Peng Wu,

Xiankai Lu,

Hao Hu,

Yongqin Xian,

Jianbing Shen,

Wenguan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Peng and Lu, Xiankai and Hu, Hao and Xian, Yongqin and Shen, Jianbing and Wang, Wenguan}, title = {LOGICZSL: Exploring Logic-induced Representation for Compositional Zero-shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30301-30311} }
ChainHOI: Joint-based Kinematic Chain Modeling for Human-Object Interaction Generation: Ling-An Zeng,

Guohong Huang,

Yi-Lin Wei,

Shengbo Gu,

Yu-Ming Tang,

Jingke Meng,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Ling-An and Huang, Guohong and Wei, Yi-Lin and Gu, Shengbo and Tang, Yu-Ming and Meng, Jingke and Zheng, Wei-Shi}, title = {ChainHOI: Joint-based Kinematic Chain Modeling for Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12358-12369} }
CLOC: Contrastive Learning for Ordinal Classification with Multi-Margin N-pair Loss: Dileepa Pitawela,

Gustavo Carneiro,

Hsiang-Ting Chen; [pdf] [supp]
[bibtex]
@InProceedings{Pitawela_2025_CVPR, author = {Pitawela, Dileepa and Carneiro, Gustavo and Chen, Hsiang-Ting}, title = {CLOC: Contrastive Learning for Ordinal Classification with Multi-Margin N-pair Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15538-15548} }
Universal Actions for Enhanced Embodied Foundation Models: Jinliang Zheng,

Jianxiong Li,

Dongxiu Liu,

Yinan Zheng,

Zhihao Wang,

Zhonghong Ou,

Yu Liu,

Jingjing Liu,

Ya-Qin Zhang,

Xianyuan Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Jinliang and Li, Jianxiong and Liu, Dongxiu and Zheng, Yinan and Wang, Zhihao and Ou, Zhonghong and Liu, Yu and Liu, Jingjing and Zhang, Ya-Qin and Zhan, Xianyuan}, title = {Universal Actions for Enhanced Embodied Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22508-22519} }
ObjectMover: Generative Object Movement with Video Prior: Xin Yu,

Tianyu Wang,

Soo Ye Kim,

Paul Guerrero,

Xi Chen,

Qing Liu,

Zhe Lin,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Xin and Wang, Tianyu and Kim, Soo Ye and Guerrero, Paul and Chen, Xi and Liu, Qing and Lin, Zhe and Qi, Xiaojuan}, title = {ObjectMover: Generative Object Movement with Video Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17682-17691} }
FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolution: Junyang Chen,

Jinshan Pan,

Jiangxin Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Junyang and Pan, Jinshan and Dong, Jiangxin}, title = {FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28188-28197} }
MLLM-as-a-Judge for Image Safety without Human Labeling: Zhenting Wang,

Shuming Hu,

Shiyu Zhao,

Xiaowen Lin,

Felix Juefei-Xu,

Zhuowei Li,

Ligong Han,

Harihar Subramanyam,

Li Chen,

Jianfa Chen,

Nan Jiang,

Lingjuan Lyu,

Shiqing Ma,

Dimitris N. Metaxas,

Ankit Jain; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenting and Hu, Shuming and Zhao, Shiyu and Lin, Xiaowen and Juefei-Xu, Felix and Li, Zhuowei and Han, Ligong and Subramanyam, Harihar and Chen, Li and Chen, Jianfa and Jiang, Nan and Lyu, Lingjuan and Ma, Shiqing and Metaxas, Dimitris N. and Jain, Ankit}, title = {MLLM-as-a-Judge for Image Safety without Human Labeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14657-14666} }
A New Statistical Model of Star Speckles for Learning to Detect and Characterize Exoplanets in Direct Imaging Observations: Théo Bodrito,

Olivier Flasseur,

Julien Mairal,

Jean Ponce,

Maud Langlois,

Anne-Marie Lagrange; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bodrito_2025_CVPR, author = {Bodrito, Th\'eo and Flasseur, Olivier and Mairal, Julien and Ponce, Jean and Langlois, Maud and Lagrange, Anne-Marie}, title = {A New Statistical Model of Star Speckles for Learning to Detect and Characterize Exoplanets in Direct Imaging Observations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1230-1240} }
Scene-agnostic Pose Regression for Visual Localization: Junwei Zheng,

Ruiping Liu,

Yufan Chen,

Zhenfang Chen,

Kailun Yang,

Jiaming Zhang,

Rainer Stiefelhagen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Junwei and Liu, Ruiping and Chen, Yufan and Chen, Zhenfang and Yang, Kailun and Zhang, Jiaming and Stiefelhagen, Rainer}, title = {Scene-agnostic Pose Regression for Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27092-27102} }
Learning to Filter Outlier Edges in Global SfM: Nicole Damblon,

Marc Pollefeys,

Daniel Barath; [pdf] [supp]
[bibtex]
@InProceedings{Damblon_2025_CVPR, author = {Damblon, Nicole and Pollefeys, Marc and Barath, Daniel}, title = {Learning to Filter Outlier Edges in Global SfM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11558-11568} }
Divide and Conquer: Heterogeneous Noise Integration for Diffusion-based Adversarial Purification: Gaozheng Pei,

Shaojie Lyu,

Gong Chen,

Ke Ma,

Qianqian Xu,

Yingfei Sun,

Qingming Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Pei_2025_CVPR, author = {Pei, Gaozheng and Lyu, Shaojie and Chen, Gong and Ma, Ke and Xu, Qianqian and Sun, Yingfei and Huang, Qingming}, title = {Divide and Conquer: Heterogeneous Noise Integration for Diffusion-based Adversarial Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29268-29277} }
SEC-Prompt:SEmantic Complementary Prompting for Few-Shot Class-Incremental Learning: Ye Liu,

Meng Yang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Ye and Yang, Meng}, title = {SEC-Prompt:SEmantic Complementary Prompting for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25643-25656} }
LiMoE: Mixture of LiDAR Representation Learners from Automotive Scenes: Xiang Xu,

Lingdong Kong,

Hui Shuai,

Liang Pan,

Ziwei Liu,

Qingshan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Xiang and Kong, Lingdong and Shuai, Hui and Pan, Liang and Liu, Ziwei and Liu, Qingshan}, title = {LiMoE: Mixture of LiDAR Representation Learners from Automotive Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27368-27379} }
CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models: Qingqing Zhao,

Yao Lu,

Moo Jin Kim,

Zipeng Fu,

Zhuoyang Zhang,

Yecheng Wu,

Zhaoshuo Li,

Qianli Ma,

Song Han,

Chelsea Finn,

Ankur Handa,

Tsung-Yi Lin,

Gordon Wetzstein,

Ming-Yu Liu,

Donglai Xiang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Qingqing and Lu, Yao and Kim, Moo Jin and Fu, Zipeng and Zhang, Zhuoyang and Wu, Yecheng and Li, Zhaoshuo and Ma, Qianli and Han, Song and Finn, Chelsea and Handa, Ankur and Lin, Tsung-Yi and Wetzstein, Gordon and Liu, Ming-Yu and Xiang, Donglai}, title = {CoT-VLA: Visual Chain-of-Thought Reasoning for Vision-Language-Action Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1702-1713} }
WAVE: Weight Templates for Adaptive Initialization of Variable-sized Models: Fu Feng,

Yucheng Xie,

Jing Wang,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Fu and Xie, Yucheng and Wang, Jing and Geng, Xin}, title = {WAVE: Weight Templates for Adaptive Initialization of Variable-sized Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4819-4828} }
Forensics Adapter: Adapting CLIP for Generalizable Face Forgery Detection: Xinjie Cui,

Yuezun Li,

Ao Luo,

Jiaran Zhou,

Junyu Dong; [pdf]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Xinjie and Li, Yuezun and Luo, Ao and Zhou, Jiaran and Dong, Junyu}, title = {Forensics Adapter: Adapting CLIP for Generalizable Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19207-19217} }
KAC: Kolmogorov-Arnold Classifier for Continual Learning: Yusong Hu,

Zichen Liang,

Fei Yang,

Qibin Hou,

Xialei Liu,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Yusong and Liang, Zichen and Yang, Fei and Hou, Qibin and Liu, Xialei and Cheng, Ming-Ming}, title = {KAC: Kolmogorov-Arnold Classifier for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15297-15307} }
PI-HMR: Towards Robust In-bed Temporal Human Shape Reconstruction with Contact Pressure Sensing: Ziyu Wu,

Yufan Xiong,

Mengting Niu,

Fangting Xie,

Quan Wan,

Qijun Ying,

Boyan Liu,

Xiaohui Cai; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Ziyu and Xiong, Yufan and Niu, Mengting and Xie, Fangting and Wan, Quan and Ying, Qijun and Liu, Boyan and Cai, Xiaohui}, title = {PI-HMR: Towards Robust In-bed Temporal Human Shape Reconstruction with Contact Pressure Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27739-27749} }
BOOTPLACE: Bootstrapped Object Placement with Detection Transformers: Hang Zhou,

Xinxin Zuo,

Rui Ma,

Li Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Hang and Zuo, Xinxin and Ma, Rui and Cheng, Li}, title = {BOOTPLACE: Bootstrapped Object Placement with Detection Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19294-19303} }
CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation: Yuxing Long,

Jiyao Zhang,

Mingjie Pan,

Tianshu Wu,

Taewhan Kim,

Hao Dong; [pdf] [arXiv]
[bibtex]
@InProceedings{Long_2025_CVPR, author = {Long, Yuxing and Zhang, Jiyao and Pan, Mingjie and Wu, Tianshu and Kim, Taewhan and Dong, Hao}, title = {CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22595-22604} }
CXPMRG-Bench: Pre-training and Benchmarking for X-ray Medical Report Generation on CheXpert Plus Dataset: Xiao Wang,

Fuling Wang,

Yuehang Li,

Qingchuan Ma,

Shiao Wang,

Bo Jiang,

Jin Tang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xiao and Wang, Fuling and Li, Yuehang and Ma, Qingchuan and Wang, Shiao and Jiang, Bo and Tang, Jin}, title = {CXPMRG-Bench: Pre-training and Benchmarking for X-ray Medical Report Generation on CheXpert Plus Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5123-5133} }
FASTer: Focal token Acquiring-and-Scaling Transformer for Long-term 3D Objection Detection: Chenxu Dang,

ZaiPeng Duan,

Pei An,

Xinmin Zhang,

Xuzhong Hu,

Jie Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2025_CVPR, author = {Dang, Chenxu and Duan, ZaiPeng and An, Pei and Zhang, Xinmin and Hu, Xuzhong and Ma, Jie}, title = {FASTer: Focal token Acquiring-and-Scaling Transformer for Long-term 3D Objection Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17029-17038} }
SEEN-DA: SEmantic ENtropy guided Domain-aware Attention for Domain Adaptive Object Detection: Haochen Li,

Rui Zhang,

Hantao Yao,

Xin Zhang,

Yifan Hao,

Xinkai Song,

Shaohui Peng,

Yongwei Zhao,

Chen Zhao,

Yanjun Wu,

Ling Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haochen and Zhang, Rui and Yao, Hantao and Zhang, Xin and Hao, Yifan and Song, Xinkai and Peng, Shaohui and Zhao, Yongwei and Zhao, Chen and Wu, Yanjun and Li, Ling}, title = {SEEN-DA: SEmantic ENtropy guided Domain-aware Attention for Domain Adaptive Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25465-25475} }
Event-Equalized Dense Video Captioning: Kangyi Wu,

Pengna Li,

Jingwen Fu,

Yizhe Li,

Yang Wu,

Yuhan Liu,

Jinjun Wang,

Sanping Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Kangyi and Li, Pengna and Fu, Jingwen and Li, Yizhe and Wu, Yang and Liu, Yuhan and Wang, Jinjun and Zhou, Sanping}, title = {Event-Equalized Dense Video Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8417-8427} }
Geometry-guided Online 3D Video Synthesis with Multi-View Temporal Consistency: Hyunho Ha,

Lei Xiao,

Christian Richardt,

Thu Nguyen-Phuoc,

Changil Kim,

Min H. Kim,

Douglas Lanman,

Numair Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ha_2025_CVPR, author = {Ha, Hyunho and Xiao, Lei and Richardt, Christian and Nguyen-Phuoc, Thu and Kim, Changil and Kim, Min H. and Lanman, Douglas and Khan, Numair}, title = {Geometry-guided Online 3D Video Synthesis with Multi-View Temporal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11275-11285} }
EDCFlow: Exploring Temporally Dense Difference Maps for Event-based Optical Flow Estimation: Daikun Liu,

Lei Cheng,

Teng Wang,

Changyin Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Daikun and Cheng, Lei and Wang, Teng and Sun, Changyin}, title = {EDCFlow: Exploring Temporally Dense Difference Maps for Event-based Optical Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1984-1993} }
Point2RBox-v2: Rethinking Point-supervised Oriented Object Detection with Spatial Layout Among Instances: Yi Yu,

Botao Ren,

Peiyuan Zhang,

Mingxin Liu,

Junwei Luo,

Shaofeng Zhang,

Feipeng Da,

Junchi Yan,

Xue Yang; [pdf]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Yi and Ren, Botao and Zhang, Peiyuan and Liu, Mingxin and Luo, Junwei and Zhang, Shaofeng and Da, Feipeng and Yan, Junchi and Yang, Xue}, title = {Point2RBox-v2: Rethinking Point-supervised Oriented Object Detection with Spatial Layout Among Instances}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19283-19293} }
LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions: Faridoun Mehri,

Mahdieh Soleymani Baghshah,

Mohammad Taher Pilehvar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehri_2025_CVPR, author = {Mehri, Faridoun and Baghshah, Mahdieh Soleymani and Pilehvar, Mohammad Taher}, title = {LibraGrad: Balancing Gradient Flow for Universally Better Vision Transformer Attributions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {67-78} }
Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model: Shuyun Wang,

Hu Zhang,

Xin Shen,

Dadong Wang,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shuyun and Zhang, Hu and Shen, Xin and Wang, Dadong and Yu, Xin}, title = {Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22975-22984} }
Mind the Trojan Horse: Image Prompt Adapter Enabling Scalable and Deceptive Jailbreaking: Junxi Chen,

Junhao Dong,

Xiaohua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Junxi and Dong, Junhao and Xie, Xiaohua}, title = {Mind the Trojan Horse: Image Prompt Adapter Enabling Scalable and Deceptive Jailbreaking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23785-23794} }
Lost in Translation, Found in Context: Sign Language Translation with Contextual Cues: Youngjoon Jang,

Haran Raajesh,

Liliane Momeni,

Gül Varol,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Youngjoon and Raajesh, Haran and Momeni, Liliane and Varol, G\"ul and Zisserman, Andrew}, title = {Lost in Translation, Found in Context: Sign Language Translation with Contextual Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8742-8752} }
CoCoGaussian: Leveraging Circle of Confusion for Gaussian Splatting from Defocused Images: Jungho Lee,

Suhwan Cho,

Taeoh Kim,

Ho-Deok Jang,

Minhyeok Lee,

Geonho Cha,

Dongyoon Wee,

Dogyoon Lee,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jungho and Cho, Suhwan and Kim, Taeoh and Jang, Ho-Deok and Lee, Minhyeok and Cha, Geonho and Wee, Dongyoon and Lee, Dogyoon and Lee, Sangyoun}, title = {CoCoGaussian: Leveraging Circle of Confusion for Gaussian Splatting from Defocused Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16101-16110} }
Semantic and Sequential Alignment for Referring Video Object Segmentation: Feiyu Pan,

Hao Fang,

Fangkai Li,

Yanyu Xu,

Yawei Li,

Luca Benini,

Xiankai Lu; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Feiyu and Fang, Hao and Li, Fangkai and Xu, Yanyu and Li, Yawei and Benini, Luca and Lu, Xiankai}, title = {Semantic and Sequential Alignment for Referring Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19067-19076} }
Synchronized Video-to-Audio Generation via Mel Quantization-Continuum Decomposition: Juncheng Wang,

Chao Xu,

Cheng Yu,

Lei Shang,

Zhe Hu,

Shujun Wang,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Juncheng and Xu, Chao and Yu, Cheng and Shang, Lei and Hu, Zhe and Wang, Shujun and Bo, Liefeng}, title = {Synchronized Video-to-Audio Generation via Mel Quantization-Continuum Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3111-3120} }
Continual SFT Matches Multimodal RLHF with Negative Supervision: Ke Zhu,

Yu Wang,

Yanpeng Sun,

Qiang Chen,

Jiangjiang Liu,

Gang Zhang,

Jingdong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ke and Wang, Yu and Sun, Yanpeng and Chen, Qiang and Liu, Jiangjiang and Zhang, Gang and Wang, Jingdong}, title = {Continual SFT Matches Multimodal RLHF with Negative Supervision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14615-14624} }
Semantic-guided Cross-Modal Prompt Learning for Skeleton-based Zero-shot Action Recognition: Anqi Zhu,

Jingmin Zhu,

James Bailey,

Mingming Gong,

Qiuhong Ke; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Anqi and Zhu, Jingmin and Bailey, James and Gong, Mingming and Ke, Qiuhong}, title = {Semantic-guided Cross-Modal Prompt Learning for Skeleton-based Zero-shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13876-13885} }
FATE: Full-head Gaussian Avatar with Textural Editing from Monocular Video: Jiawei Zhang,

Zijian Wu,

Zhiyang Liang,

Yicheng Gong,

Dongfang Hu,

Yao Yao,

Xun Cao,

Hao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiawei and Wu, Zijian and Liang, Zhiyang and Gong, Yicheng and Hu, Dongfang and Yao, Yao and Cao, Xun and Zhu, Hao}, title = {FATE: Full-head Gaussian Avatar with Textural Editing from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5535-5545} }
ChatGen: Automatic Text-to-Image Generation From FreeStyle Chatting: Chengyou Jia,

Changliang Xia,

Zhuohang Dang,

Weijia Wu,

Hangwei Qian,

Minnan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Chengyou and Xia, Changliang and Dang, Zhuohang and Wu, Weijia and Qian, Hangwei and Luo, Minnan}, title = {ChatGen: Automatic Text-to-Image Generation From FreeStyle Chatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13284-13293} }
GEM: A Generalizable Ego-Vision Multimodal World Model for Fine-Grained Ego-Motion, Object Dynamics, and Scene Composition Control: Mariam Hassan,

Sebastian Stapf,

Ahmad Rahimi,

Pedro M B Rezende,

Yasaman Haghighi,

David Brüggemann,

Isinsu Katircioglu,

Lin Zhang,

Xiaoran Chen,

Suman Saha,

Marco Cannici,

Elie Aljalbout,

Botao Ye,

Xi Wang,

Aram Davtyan,

Mathieu Salzmann,

Davide Scaramuzza,

Marc Pollefeys,

Paolo Favaro,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hassan_2025_CVPR, author = {Hassan, Mariam and Stapf, Sebastian and Rahimi, Ahmad and Rezende, Pedro M B and Haghighi, Yasaman and Br\"uggemann, David and Katircioglu, Isinsu and Zhang, Lin and Chen, Xiaoran and Saha, Suman and Cannici, Marco and Aljalbout, Elie and Ye, Botao and Wang, Xi and Davtyan, Aram and Salzmann, Mathieu and Scaramuzza, Davide and Pollefeys, Marc and Favaro, Paolo and Alahi, Alexandre}, title = {GEM: A Generalizable Ego-Vision Multimodal World Model for Fine-Grained Ego-Motion, Object Dynamics, and Scene Composition Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22404-22415} }
VEU-Bench: Towards Comprehensive Understanding of Video Editing: Bozheng Li,

Yongliang Wu,

Yi Lu,

Jiashuo Yu,

Licheng Tang,

Jiawang Cao,

Wenqing Zhu,

Yuyang Sun,

Jay Wu,

Wenbo Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Bozheng and Wu, Yongliang and Lu, Yi and Yu, Jiashuo and Tang, Licheng and Cao, Jiawang and Zhu, Wenqing and Sun, Yuyang and Wu, Jay and Zhu, Wenbo}, title = {VEU-Bench: Towards Comprehensive Understanding of Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13671-13680} }
Decouple Distortion from Perception: Region Adaptive Diffusion for Extreme-low Bitrate Perception Image Compression: Jinchang Xu,

Shaokang Wang,

Jintao Chen,

Zhe Li,

Peidong Jia,

Fei Zhao,

Guoqing Xiang,

Zhijian Hao,

Shanghang Zhang,

Xiaodong Xie; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jinchang and Wang, Shaokang and Chen, Jintao and Li, Zhe and Jia, Peidong and Zhao, Fei and Xiang, Guoqing and Hao, Zhijian and Zhang, Shanghang and Xie, Xiaodong}, title = {Decouple Distortion from Perception: Region Adaptive Diffusion for Extreme-low Bitrate Perception Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18051-18061} }
Yo'Chameleon: Personalized Vision and Language Generation: Thao Nguyen,

Krishna Kumar Singh,

Jing Shi,

Trung Bui,

Yong Jae Lee,

Yuheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Thao and Singh, Krishna Kumar and Shi, Jing and Bui, Trung and Lee, Yong Jae and Li, Yuheng}, title = {Yo'Chameleon: Personalized Vision and Language Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14438-14448} }
PatchVSR: Breaking Video Diffusion Resolution Limits with Patch-wise Video Super-Resolution: Shian Du,

Menghan Xia,

Chang Liu,

Xintao Wang,

Jing Wang,

Pengfei Wan,

Di Zhang,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Shian and Xia, Menghan and Liu, Chang and Wang, Xintao and Wang, Jing and Wan, Pengfei and Zhang, Di and Ji, Xiangyang}, title = {PatchVSR: Breaking Video Diffusion Resolution Limits with Patch-wise Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17799-17809} }
FluxSpace: Disentangled Semantic Editing in Rectified Flow Models: Yusuf Dalva,

Kavana Venkatesh,

Pinar Yanardag; [pdf] [supp]
[bibtex]
@InProceedings{Dalva_2025_CVPR, author = {Dalva, Yusuf and Venkatesh, Kavana and Yanardag, Pinar}, title = {FluxSpace: Disentangled Semantic Editing in Rectified Flow Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13083-13092} }
Scene-Centric Unsupervised Panoptic Segmentation: Oliver Hahn,

Christoph Reich,

Nikita Araslanov,

Daniel Cremers,

Christian Rupprecht,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hahn_2025_CVPR, author = {Hahn, Oliver and Reich, Christoph and Araslanov, Nikita and Cremers, Daniel and Rupprecht, Christian and Roth, Stefan}, title = {Scene-Centric Unsupervised Panoptic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24485-24495} }
Touch2Shape: Touch-Conditioned 3D Diffusion for Shape Exploration and Reconstruction: Yuanbo Wang,

Zhaoxuan Zhang,

Jiajin Qiu,

Dilong Sun,

Zhengyu Meng,

Xiaopeng Wei,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuanbo and Zhang, Zhaoxuan and Qiu, Jiajin and Sun, Dilong and Meng, Zhengyu and Wei, Xiaopeng and Yang, Xin}, title = {Touch2Shape: Touch-Conditioned 3D Diffusion for Shape Exploration and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5656-5665} }
VITED: Video Temporal Evidence Distillation: Yujie Lu,

Yale Song,

William Wang,

Lorenzo Torresani,

Tushar Nagarajan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yujie and Song, Yale and Wang, William and Torresani, Lorenzo and Nagarajan, Tushar}, title = {VITED: Video Temporal Evidence Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8501-8511} }
Adversarial Domain Prompt Tuning and Generation for Single Domain Generalization: Zhipeng Xu,

De Cheng,

Xinyang Jiang,

Nannan Wang,

Dongsheng Li,

Xinbo Gao; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zhipeng and Cheng, De and Jiang, Xinyang and Wang, Nannan and Li, Dongsheng and Gao, Xinbo}, title = {Adversarial Domain Prompt Tuning and Generation for Single Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18584-18595} }
Learning Physics From Video: Unsupervised Physical Parameter Estimation for Continuous Dynamical Systems: Alejandro Castañeda Garcia,

Jan Warchocki,

Jan van Gemert,

Daan Brinks,

Nergis Tomen; [pdf] [supp]
[bibtex]
@InProceedings{Garcia_2025_CVPR, author = {Garcia, Alejandro Casta\~neda and Warchocki, Jan and van Gemert, Jan and Brinks, Daan and Tomen, Nergis}, title = {Learning Physics From Video: Unsupervised Physical Parameter Estimation for Continuous Dynamical Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27924-27933} }
Temporal Score Analysis for Understanding and Correcting Diffusion Artifacts: Yu Cao,

Zengqun Zhao,

Ioannis Patras,

Shaogang Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Yu and Zhao, Zengqun and Patras, Ioannis and Gong, Shaogang}, title = {Temporal Score Analysis for Understanding and Correcting Diffusion Artifacts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7707-7716} }
ProAPO: Progressively Automatic Prompt Optimization for Visual Classification: Xiangyan Qu,

Gaopeng Gou,

Jiamin Zhuang,

Jing Yu,

Kun Song,

Qihao Wang,

Yili Li,

Gang Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Xiangyan and Gou, Gaopeng and Zhuang, Jiamin and Yu, Jing and Song, Kun and Wang, Qihao and Li, Yili and Xiong, Gang}, title = {ProAPO: Progressively Automatic Prompt Optimization for Visual Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25145-25155} }
ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts: Dmitry Petrov,

Pradyumn Goyal,

Divyansh Shivashok,

Yuanming Tao,

Melinos Averkiou,

Evangelos Kalogerakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Petrov_2025_CVPR, author = {Petrov, Dmitry and Goyal, Pradyumn and Shivashok, Divyansh and Tao, Yuanming and Averkiou, Melinos and Kalogerakis, Evangelos}, title = {ShapeWords: Guiding Text-to-Image Synthesis with 3D Shape-Aware Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13305-13314} }
Auto-Encoded Supervision for Perceptual Image Super-Resolution: MinKyu Lee,

Sangeek Hyun,

Woojin Jun,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, MinKyu and Hyun, Sangeek and Jun, Woojin and Heo, Jae-Pil}, title = {Auto-Encoded Supervision for Perceptual Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17958-17968} }
Black Swan: Abductive and Defeasible Video Reasoning in Unpredictable Events: Aditya Chinchure,

Sahithya Ravi,

Raymond Ng,

Vered Shwartz,

Boyang Li,

Leonid Sigal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chinchure_2025_CVPR, author = {Chinchure, Aditya and Ravi, Sahithya and Ng, Raymond and Shwartz, Vered and Li, Boyang and Sigal, Leonid}, title = {Black Swan: Abductive and Defeasible Video Reasoning in Unpredictable Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24201-24210} }
Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise: Ryan Burgert,

Yuancheng Xu,

Wenqi Xian,

Oliver Pilarski,

Pascal Clausen,

Mingming He,

Li Ma,

Yitong Deng,

Lingxiao Li,

Mohsen Mousavi,

Michael Ryoo,

Paul Debevec,

Ning Yu; [pdf] [supp]
[bibtex]
@InProceedings{Burgert_2025_CVPR, author = {Burgert, Ryan and Xu, Yuancheng and Xian, Wenqi and Pilarski, Oliver and Clausen, Pascal and He, Mingming and Ma, Li and Deng, Yitong and Li, Lingxiao and Mousavi, Mohsen and Ryoo, Michael and Debevec, Paul and Yu, Ning}, title = {Go-with-the-Flow: Motion-Controllable Video Diffusion Models Using Real-Time Warped Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13-23} }
Silence is Golden: Leveraging Adversarial Examples to Nullify Audio Control in LDM-based Talking-Head Generation: Yuan Gan,

Jiaxu Miao,

Yunze Wang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gan_2025_CVPR, author = {Gan, Yuan and Miao, Jiaxu and Wang, Yunze and Yang, Yi}, title = {Silence is Golden: Leveraging Adversarial Examples to Nullify Audio Control in LDM-based Talking-Head Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13434-13444} }
Iterative Predictor-Critic Code Decoding for Real-World Image Dehazing: Jiayi Fu,

Siyu Liu,

Zikun Liu,

Chun-Le Guo,

Hyunhee Park,

Ruiqi Wu,

Guoqing Wang,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Jiayi and Liu, Siyu and Liu, Zikun and Guo, Chun-Le and Park, Hyunhee and Wu, Ruiqi and Wang, Guoqing and Li, Chongyi}, title = {Iterative Predictor-Critic Code Decoding for Real-World Image Dehazing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12700-12709} }
RNG: Relightable Neural Gaussians: Jiahui Fan,

Fujun Luan,

Jian Yang,

Milos Hasan,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Jiahui and Luan, Fujun and Yang, Jian and Hasan, Milos and Wang, Beibei}, title = {RNG: Relightable Neural Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26525-26534} }
Towards Realistic Example-based Modeling via 3D Gaussian Stitching: Xinyu Gao,

Ziyi Yang,

Bingchen Gong,

Xiaoguang Han,

Sipeng Yang,

Xiaogang Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Xinyu and Yang, Ziyi and Gong, Bingchen and Han, Xiaoguang and Yang, Sipeng and Jin, Xiaogang}, title = {Towards Realistic Example-based Modeling via 3D Gaussian Stitching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26597-26607} }
Filter Images First, Generate Instructions Later: Pre-Instruction Data Selection for Visual Instruction Tuning: Bardia Safaei,

Faizan Siddiqui,

Jiacong Xu,

Vishal M. Patel,

Shao-Yuan Lo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Safaei_2025_CVPR, author = {Safaei, Bardia and Siddiqui, Faizan and Xu, Jiacong and Patel, Vishal M. and Lo, Shao-Yuan}, title = {Filter Images First, Generate Instructions Later: Pre-Instruction Data Selection for Visual Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14247-14256} }
Gradient-Guided Annealing for Domain Generalization: Aristotelis Ballas,

Christos Diou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ballas_2025_CVPR, author = {Ballas, Aristotelis and Diou, Christos}, title = {Gradient-Guided Annealing for Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20558-20568} }
Generative Sparse-View Gaussian Splatting: Hanyang Kong,

Xingyi Yang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Hanyang and Yang, Xingyi and Wang, Xinchao}, title = {Generative Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26745-26755} }
MicroVQA: A Multimodal Reasoning Benchmark for Microscopy-Based Scientific Research: James Burgess,

Jeffrey J Nirschl,

Laura Bravo-Sánchez,

Alejandro Lozano,

Sanket Rajan Gupte,

Jesus G. Galaz-Montoya,

Yuhui Zhang,

Yuchang Su,

Disha Bhowmik,

Zachary Coman,

Sarina M Hasan,

Alexandra Johannesson,

William D. Leineweber,

Malvika G Nair,

Ridhi Yarlagadda,

Connor Zuraski,

Wah Chiu,

Sarah Cohen,

Jan N. Hansen,

Manuel D Leonetti,

Chad Liu,

Emma Lundberg,

Serena Yeung-Levy; [pdf] [supp]
[bibtex]
@InProceedings{Burgess_2025_CVPR, author = {Burgess, James and Nirschl, Jeffrey J and Bravo-S\'anchez, Laura and Lozano, Alejandro and Gupte, Sanket Rajan and Galaz-Montoya, Jesus G. and Zhang, Yuhui and Su, Yuchang and Bhowmik, Disha and Coman, Zachary and Hasan, Sarina M and Johannesson, Alexandra and Leineweber, William D. and Nair, Malvika G and Yarlagadda, Ridhi and Zuraski, Connor and Chiu, Wah and Cohen, Sarah and Hansen, Jan N. and Leonetti, Manuel D and Liu, Chad and Lundberg, Emma and Yeung-Levy, Serena}, title = {MicroVQA: A Multimodal Reasoning Benchmark for Microscopy-Based Scientific Research}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19552-19564} }
Generative Inbetweening through Frame-wise Conditions-Driven Video Generation: Tianyi Zhu,

Dongwei Ren,

Qilong Wang,

Xiaohe Wu,

Wangmeng Zuo; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Tianyi and Ren, Dongwei and Wang, Qilong and Wu, Xiaohe and Zuo, Wangmeng}, title = {Generative Inbetweening through Frame-wise Conditions-Driven Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27968-27978} }
DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness: Yiming Zhong,

Qi Jiang,

Jingyi Yu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Yiming and Jiang, Qi and Yu, Jingyi and Ma, Yuexin}, title = {DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22584-22594} }
CustAny: Customizing Anything from A Single Example: Lingjie Kong,

Kai Wu,

Chengming Xu,

Xiaobin Hu,

Wenhui Han,

Jinlong Peng,

Donghao Luo,

Mengtian Li,

Jiangning Zhang,

Chengjie Wang,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Lingjie and Wu, Kai and Xu, Chengming and Hu, Xiaobin and Han, Wenhui and Peng, Jinlong and Luo, Donghao and Li, Mengtian and Zhang, Jiangning and Wang, Chengjie and Fu, Yanwei}, title = {CustAny: Customizing Anything from A Single Example}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20916-20925} }
Vision-Language Gradient Descent-driven All-in-One Deep Unfolding Networks: Haijin Zeng,

Xiangming Wang,

Yongyong Chen,

Jingyong Su,

Jie Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Haijin and Wang, Xiangming and Chen, Yongyong and Su, Jingyong and Liu, Jie}, title = {Vision-Language Gradient Descent-driven All-in-One Deep Unfolding Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7524-7533} }
3D-LLaVA: Towards Generalist 3D LMMs with Omni Superpoint Transformer: Jiajun Deng,

Tianyu He,

Li Jiang,

Tianyu Wang,

Feras Dayoub,

Ian Reid; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Jiajun and He, Tianyu and Jiang, Li and Wang, Tianyu and Dayoub, Feras and Reid, Ian}, title = {3D-LLaVA: Towards Generalist 3D LMMs with Omni Superpoint Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3772-3782} }
Event-based Video Super-Resolution via State Space Models: Zeyu Xiao,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Zeyu and Wang, Xinchao}, title = {Event-based Video Super-Resolution via State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12564-12574} }
PoseTraj: Pose-Aware Trajectory Control in Video Diffusion: Longbin Ji,

Lei Zhong,

Pengfei Wei,

Changjian Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Longbin and Zhong, Lei and Wei, Pengfei and Li, Changjian}, title = {PoseTraj: Pose-Aware Trajectory Control in Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22776-22785} }
Masked Scene Modeling: Narrowing the Gap Between Supervised and Self-Supervised Learning in 3D Scene Understanding: Pedro Hermosilla,

Christian Stippel,

Leon Sick; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hermosilla_2025_CVPR, author = {Hermosilla, Pedro and Stippel, Christian and Sick, Leon}, title = {Masked Scene Modeling: Narrowing the Gap Between Supervised and Self-Supervised Learning in 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14835-14844} }
VL2Lite: Task-Specific Knowledge Distillation from Large Vision-Language Models to Lightweight Networks: Jinseong Jang,

Chunfei Ma,

Byeongwon Lee; [pdf]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Jinseong and Ma, Chunfei and Lee, Byeongwon}, title = {VL2Lite: Task-Specific Knowledge Distillation from Large Vision-Language Models to Lightweight Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30073-30083} }
Boost the Inference with Co-training: A Depth-guided Mutual Learning Framework for Semi-supervised Medical Polyp Segmentation: Yuxin Li,

Zihao Zhu,

Yuxiang Zhang,

Yifan Chen,

Zhibin Yu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuxin and Zhu, Zihao and Zhang, Yuxiang and Chen, Yifan and Yu, Zhibin}, title = {Boost the Inference with Co-training: A Depth-guided Mutual Learning Framework for Semi-supervised Medical Polyp Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10394-10403} }
VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large Language Models for Video Understanding: Chaoyu Li,

Eun Woo Im,

Pooyan Fazli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Chaoyu and Im, Eun Woo and Fazli, Pooyan}, title = {VidHalluc: Evaluating Temporal Hallucinations in Multimodal Large Language Models for Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13723-13733} }
StageDesigner: Artistic Stage Generation for Scenography via Theater Scripts: Zhaoxing Gan,

Mengtian Li,

Ruhua Chen,

Zhongxia Ji,

Sichen Guo,

Huanling Hu,

Guangnan Ye,

Zuo Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gan_2025_CVPR, author = {Gan, Zhaoxing and Li, Mengtian and Chen, Ruhua and Ji, Zhongxia and Guo, Sichen and Hu, Huanling and Ye, Guangnan and Hu, Zuo}, title = {StageDesigner: Artistic Stage Generation for Scenography via Theater Scripts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28705-28714} }
From Laboratory to Real World: A New Benchmark Towards Privacy-Preserved Visible-Infrared Person Re-Identification: Yan Jiang,

Hao Yu,

Xu Cheng,

Haoyu Chen,

Zhaodong Sun,

Guoying Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yan and Yu, Hao and Cheng, Xu and Chen, Haoyu and Sun, Zhaodong and Zhao, Guoying}, title = {From Laboratory to Real World: A New Benchmark Towards Privacy-Preserved Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8828-8837} }
4Deform: Neural Surface Deformation for Robust Shape Interpolation: Lu Sang,

Zehranaz Canfes,

Dongliang Cao,

Riccardo Marin,

Florian Bernard,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sang_2025_CVPR, author = {Sang, Lu and Canfes, Zehranaz and Cao, Dongliang and Marin, Riccardo and Bernard, Florian and Cremers, Daniel}, title = {4Deform: Neural Surface Deformation for Robust Shape Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6542-6551} }
Dense Match Summarization for Faster Two-view Estimation: Jonathan Astermark,

Anders Heyden,

Viktor Larsson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Astermark_2025_CVPR, author = {Astermark, Jonathan and Heyden, Anders and Larsson, Viktor}, title = {Dense Match Summarization for Faster Two-view Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1093-1102} }
Align-A-Video: Deterministic Reward Tuning of Image Diffusion Models for Consistent Video Editing: Shengzhi Wang,

Yingkang Zhong,

Jiangchuan Mu,

Kai Wu,

Mingliang Xiong,

Wen Fang,

Mingqing Liu,

Hao Deng,

Bin He,

Gang Li,

Qingwen Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shengzhi and Zhong, Yingkang and Mu, Jiangchuan and Wu, Kai and Xiong, Mingliang and Fang, Wen and Liu, Mingqing and Deng, Hao and He, Bin and Li, Gang and Liu, Qingwen}, title = {Align-A-Video: Deterministic Reward Tuning of Image Diffusion Models for Consistent Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2074-2083} }
Interpreting Object-level Foundation Models via Visual Precision Search: Ruoyu Chen,

Siyuan Liang,

Jingzhi Li,

Shiming Liu,

Maosen Li,

Zhen Huang,

Hua Zhang,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ruoyu and Liang, Siyuan and Li, Jingzhi and Liu, Shiming and Li, Maosen and Huang, Zhen and Zhang, Hua and Cao, Xiaochun}, title = {Interpreting Object-level Foundation Models via Visual Precision Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30042-30052} }
Foley-Flow: Coordinated Video-to-Audio Generation with Masked Audio-Visual Alignment and Dynamic Conditional Flows: Shentong Mo,

Yibing Song; [pdf]
[bibtex]
@InProceedings{Mo_2025_CVPR, author = {Mo, Shentong and Song, Yibing}, title = {Foley-Flow: Coordinated Video-to-Audio Generation with Masked Audio-Visual Alignment and Dynamic Conditional Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28912-28921} }
LION-FS: Fast & Slow Video-Language Thinker as Online Video Assistant: Wei Li,

Bing Hu,

Rui Shao,

Leyang Shen,

Liqiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wei and Hu, Bing and Shao, Rui and Shen, Leyang and Nie, Liqiang}, title = {LION-FS: Fast \& Slow Video-Language Thinker as Online Video Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3240-3251} }
CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction: Yuan Zhou,

Qingshan Xu,

Jiequan Cui,

Junbao Zhou,

Jing Zhang,

Richang Hong,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yuan and Xu, Qingshan and Cui, Jiequan and Zhou, Junbao and Zhang, Jing and Hong, Richang and Zhang, Hanwang}, title = {CARE Transformer: Mobile-Friendly Linear Visual Transformer via Decoupled Dual Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20135-20145} }
Paint by Inpaint: Learning to Add Image Objects by Removing Them First: Navve Wasserman,

Noam Rotstein,

Roy Ganz,

Ron Kimmel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wasserman_2025_CVPR, author = {Wasserman, Navve and Rotstein, Noam and Ganz, Roy and Kimmel, Ron}, title = {Paint by Inpaint: Learning to Add Image Objects by Removing Them First}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18313-18324} }
Motion-Grounded Video Reasoning: Understanding and Perceiving Motion at Pixel Level: Andong Deng,

Tongjia Chen,

Shoubin Yu,

Taojiannan Yang,

Lincoln Spencer,

Yapeng Tian,

Ajmal Saeed Mian,

Mohit Bansal,

Chen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Andong and Chen, Tongjia and Yu, Shoubin and Yang, Taojiannan and Spencer, Lincoln and Tian, Yapeng and Mian, Ajmal Saeed and Bansal, Mohit and Chen, Chen}, title = {Motion-Grounded Video Reasoning: Understanding and Perceiving Motion at Pixel Level}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8625-8636} }
PMA: Towards Parameter-Efficient Point Cloud Understanding via Point Mamba Adapter: Yaohua Zha,

Yanzi Wang,

Hang Guo,

Jinpeng Wang,

Tao Dai,

Bin Chen,

Zhihao Ouyang,

Xue Yuerong,

Ke Chen,

Shu-Tao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zha_2025_CVPR, author = {Zha, Yaohua and Wang, Yanzi and Guo, Hang and Wang, Jinpeng and Dai, Tao and Chen, Bin and Ouyang, Zhihao and Yuerong, Xue and Chen, Ke and Xia, Shu-Tao}, title = {PMA: Towards Parameter-Efficient Point Cloud Understanding via Point Mamba Adapter}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16976-16986} }
All-directional Disparity Estimation for Real-world QPD Images: Hongtao Yu,

Shaohui Song,

Lihu Sun,

Wenkai Su,

Xiaodong Yang,

Chengming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hongtao and Song, Shaohui and Sun, Lihu and Su, Wenkai and Yang, Xiaodong and Liu, Chengming}, title = {All-directional Disparity Estimation for Real-world QPD Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21836-21846} }
LC-Mamba: Local and Continuous Mamba with Shifted Windows for Frame Interpolation: Min Wu Jeong,

Chae Eun Rhee; [pdf] [supp]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Min Wu and Rhee, Chae Eun}, title = {LC-Mamba: Local and Continuous Mamba with Shifted Windows for Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17671-17681} }
Zero-Shot Head Swapping in Real-World Scenarios: Taewoong Kang,

Sohyun Jeong,

Hyojin Jang,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Taewoong and Jeong, Sohyun and Jang, Hyojin and Choo, Jaegul}, title = {Zero-Shot Head Swapping in Real-World Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10805-10814} }
Toward Robust Neural Reconstruction from Sparse Point Sets: Amine Ouasfi,

Shubhendu Jena,

Eric Marchand,

Adnane Boukhayma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouasfi_2025_CVPR, author = {Ouasfi, Amine and Jena, Shubhendu and Marchand, Eric and Boukhayma, Adnane}, title = {Toward Robust Neural Reconstruction from Sparse Point Sets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6552-6562} }
GPAvatar: High-fidelity Head Avatars by Learning Efficient Gaussian Projections: Wei-Qi Feng,

Dong Han,

Ze-Kang Zhou,

Shunkai Li,

Xiaoqiang Liu,

Pengfei Wan,

Di Zhang,

Miao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Wei-Qi and Han, Dong and Zhou, Ze-Kang and Li, Shunkai and Liu, Xiaoqiang and Wan, Pengfei and Zhang, Di and Wang, Miao}, title = {GPAvatar: High-fidelity Head Avatars by Learning Efficient Gaussian Projections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {250-259} }
PIAD: Pose and Illumination agnostic Anomaly Detection: Kaichen Yang,

Junjie Cao,

Zeyu Bai,

Zhixun Su,

Andrea Tagliasacchi; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Kaichen and Cao, Junjie and Bai, Zeyu and Su, Zhixun and Tagliasacchi, Andrea}, title = {PIAD: Pose and Illumination agnostic Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4734-4743} }
CAV-MAE Sync: Improving Contrastive Audio-Visual Mask Autoencoders via Fine-Grained Alignment: Edson Araujo,

Andrew Rouditchenko,

Yuan Gong,

Saurabhchand Bhati,

Samuel Thomas,

Brian Kingsbury,

Leonid Karlinsky,

Rogerio Feris,

James R. Glass,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Araujo_2025_CVPR, author = {Araujo, Edson and Rouditchenko, Andrew and Gong, Yuan and Bhati, Saurabhchand and Thomas, Samuel and Kingsbury, Brian and Karlinsky, Leonid and Feris, Rogerio and Glass, James R. and Kuehne, Hilde}, title = {CAV-MAE Sync: Improving Contrastive Audio-Visual Mask Autoencoders via Fine-Grained Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18794-18803} }
Two is Better than One: Efficient Ensemble Defense for Robust and Compact Models: Yoojin Jung,

Byung Cheol Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Yoojin and Song, Byung Cheol}, title = {Two is Better than One: Efficient Ensemble Defense for Robust and Compact Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9696-9706} }
Tiled Diffusion: Or Madar,

Ohad Fried; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Madar_2025_CVPR, author = {Madar, Or and Fried, Ohad}, title = {Tiled Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7795-7804} }
Using Diffusion Priors for Video Amodal Segmentation: Kaihua Chen,

Deva Ramanan,

Tarasha Khurana; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kaihua and Ramanan, Deva and Khurana, Tarasha}, title = {Using Diffusion Priors for Video Amodal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22890-22900} }
COBRA: COmBinatorial Retrieval Augmentation for Few-Shot Adaptation: Arnav M. Das,

Gantavya Bhatt,

Lilly Kumari,

Sahil Verma,

Jeff Bilmes; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Das_2025_CVPR, author = {Das, Arnav M. and Bhatt, Gantavya and Kumari, Lilly and Verma, Sahil and Bilmes, Jeff}, title = {COBRA: COmBinatorial Retrieval Augmentation for Few-Shot Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20534-20546} }
Dyn-HaMR: Recovering 4D Interacting Hand Motion from a Dynamic Camera: Zhengdi Yu,

Stefanos Zafeiriou,

Tolga Birdal; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Zhengdi and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Dyn-HaMR: Recovering 4D Interacting Hand Motion from a Dynamic Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27716-27726} }
The Scene Language: Representing Scenes with Programs, Words, and Embeddings: Yunzhi Zhang,

Zizhang Li,

Matt Zhou,

Shangzhe Wu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yunzhi and Li, Zizhang and Zhou, Matt and Wu, Shangzhe and Wu, Jiajun}, title = {The Scene Language: Representing Scenes with Programs, Words, and Embeddings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24625-24634} }
ProbeSDF: Light Field Probes For Neural Surface Reconstruction: Briac Toussaint,

Diego Thomas,

Jean-Sébastien Franco; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Toussaint_2025_CVPR, author = {Toussaint, Briac and Thomas, Diego and Franco, Jean-S\'ebastien}, title = {ProbeSDF: Light Field Probes For Neural Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11026-11035} }
Descriptor-In-Pixel : Point-Feature Tracking For Pixel Processor Arrays: Laurie Bose,

Jianing Chen,

Piotr Dudek; [pdf] [supp]
[bibtex]
@InProceedings{Bose_2025_CVPR, author = {Bose, Laurie and Chen, Jianing and Dudek, Piotr}, title = {Descriptor-In-Pixel : Point-Feature Tracking For Pixel Processor Arrays}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5392-5400} }
Hybrid Concept Bottleneck Models: Yang Liu,

Tianwei Zhang,

Shi Gu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yang and Zhang, Tianwei and Gu, Shi}, title = {Hybrid Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20179-20189} }
UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping: Aashish Rai,

Dilin Wang,

Mihir Jain,

Nikolaos Sarafianos,

Kefan Chen,

Srinath Sridhar,

Aayush Prakash; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rai_2025_CVPR, author = {Rai, Aashish and Wang, Dilin and Jain, Mihir and Sarafianos, Nikolaos and Chen, Kefan and Sridhar, Srinath and Prakash, Aayush}, title = {UVGS: Reimagining Unstructured 3D Gaussian Splatting using UV Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5927-5937} }
Dual Consolidation for Pre-Trained Model-Based Domain-Incremental Learning: Da-Wei Zhou,

Zi-Wen Cai,

Han-Jia Ye,

Lijun Zhang,

De-Chuan Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Da-Wei and Cai, Zi-Wen and Ye, Han-Jia and Zhang, Lijun and Zhan, De-Chuan}, title = {Dual Consolidation for Pre-Trained Model-Based Domain-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20547-20557} }
Learning Physics-Based Full-Body Human Reaching and Grasping from Brief Walking References: Yitang Li,

Mingxian Lin,

Zhuo Lin,

Yipeng Deng,

Yue Cao,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yitang and Lin, Mingxian and Lin, Zhuo and Deng, Yipeng and Cao, Yue and Yi, Li}, title = {Learning Physics-Based Full-Body Human Reaching and Grasping from Brief Walking References}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27673-27682} }
EmoEdit: Evoking Emotions through Image Manipulation: Jingyuan Yang,

Jiawei Feng,

Weibin Luo,

Dani Lischinski,

Daniel Cohen-Or,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jingyuan and Feng, Jiawei and Luo, Weibin and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {EmoEdit: Evoking Emotions through Image Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24690-24699} }
RORem: Training a Robust Object Remover with Human-in-the-Loop: Ruibin Li,

Tao Yang,

Song Guo,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ruibin and Yang, Tao and Guo, Song and Zhang, Lei}, title = {RORem: Training a Robust Object Remover with Human-in-the-Loop}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14024-14035} }
All Languages Matter: Evaluating LMMs on Culturally Diverse 100 Languages: Ashmal Vayani,

Dinura Dissanayake,

Hasindri Watawana,

Noor Ahsan,

Nevasini Sasikumar,

Omkar Thawakar,

Henok Biadglign Ademtew,

Yahya Hmaiti,

Amandeep Kumar,

Kartik Kukreja,

Mykola Maslych,

Wafa Al Ghallabi,

Mihail Minkov Mihaylov,

Chao Qin,

Abdelrahman M. Shaker,

Mike Zhang,

Mahardika Krisna Ihsani,

Amiel Gian Esplana,

Monil Gokani,

Shachar Mirkin,

Harsh Singh,

Ashay Srivastava,

Endre Hamerlik,

Fathinah Asma Izzati,

Fadillah Adamsyah Maani,

Sebastian Cavada,

Jenny Chim,

Rohit Gupta,

Sanjay Manjunath,

Kamila Zhumakhanova,

Feno Heriniaina Rabevohitra,

Azril Hafizi Amirudin,

Muhammad Ridzuan,

Daniya Najiha Abdul Kareem,

Ketan Pravin More,

Kunyang Li,

Pramesh Shakya,

Muhammad Saad,

Amirpouya Ghasemaghaei,

Amirbek Djanibekov,

Dilshod Azizov,

Branislava Jankovic,

Naman Bhatia,

Alvaro Cabrera,

Johan Obando-Ceron,

Olympiah Otieno,

Febian Farestam,

Muztoba Rabbani,

Sanoojan Ballah,

Santosh Sanjeev,

Abduragim Shtanchaev,

Maheen Fatima,

Thao Nguyen,

Amrin Kareem,

Toluwani Aremu,

Nathan Augusto Zacarias Xavier,

Amit Bhatkal,

Hawau Olamide Toyin,

Aman Chadha,

Hisham Cholakkal,

Rao Muhammad Anwer,

Michael Felsberg,

Jorma Laaksonen,

Thamar Solorio,

Monojit Choudhury,

Ivan Laptev,

Mubarak Shah,

Salman Khan,

Fahad Shahbaz Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vayani_2025_CVPR, author = {Vayani, Ashmal and Dissanayake, Dinura and Watawana, Hasindri and Ahsan, Noor and Sasikumar, Nevasini and Thawakar, Omkar and Ademtew, Henok Biadglign and Hmaiti, Yahya and Kumar, Amandeep and Kukreja, Kartik and Maslych, Mykola and Al Ghallabi, Wafa and Mihaylov, Mihail Minkov and Qin, Chao and Shaker, Abdelrahman M. and Zhang, Mike and Ihsani, Mahardika Krisna and Esplana, Amiel Gian and Gokani, Monil and Mirkin, Shachar and Singh, Harsh and Srivastava, Ashay and Hamerlik, Endre and Izzati, Fathinah Asma and Maani, Fadillah Adamsyah and Cavada, Sebastian and Chim, Jenny and Gupta, Rohit and Manjunath, Sanjay and Zhumakhanova, Kamila and Rabevohitra, Feno Heriniaina and Amirudin, Azril Hafizi and Ridzuan, Muhammad and Kareem, Daniya Najiha Abdul and More, Ketan Pravin and Li, Kunyang and Shakya, Pramesh and Saad, Muhammad and Ghasemaghaei, Amirpouya and Djanibekov, Amirbek and Azizov, Dilshod and Jankovic, Branislava and Bhatia, Naman and Cabrera, Alvaro and Obando-Ceron, Johan and Otieno, Olympiah and Farestam, Febian and Rabbani, Muztoba and Ballah, Sanoojan and Sanjeev, Santosh and Shtanchaev, Abduragim and Fatima, Maheen and Nguyen, Thao and Kareem, Amrin and Aremu, Toluwani and Xavier, Nathan Augusto Zacarias and Bhatkal, Amit and Toyin, Hawau Olamide and Chadha, Aman and Cholakkal, Hisham and Anwer, Rao Muhammad and Felsberg, Michael and Laaksonen, Jorma and Solorio, Thamar and Choudhury, Monojit and Laptev, Ivan and Shah, Mubarak and Khan, Salman and Khan, Fahad Shahbaz}, title = {All Languages Matter: Evaluating LMMs on Culturally Diverse 100 Languages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19565-19575} }
SparseAlign: a Fully Sparse Framework for Cooperative Object Detection: Yunshuang Yuan,

Yan Xia,

Daniel Cremers,

Monika Sester; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yunshuang and Xia, Yan and Cremers, Daniel and Sester, Monika}, title = {SparseAlign: a Fully Sparse Framework for Cooperative Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22296-22305} }
Video-Bench: Human-Aligned Video Generation Benchmark: Hui Han,

Siyuan Li,

Jiaqi Chen,

Yiwen Yuan,

Yuling Wu,

Yufan Deng,

Chak Tou Leong,

Hanwen Du,

Junchen Fu,

Youhua Li,

Jie Zhang,

Chi Zhang,

Li-jia Li,

Yongxin Ni; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Hui and Li, Siyuan and Chen, Jiaqi and Yuan, Yiwen and Wu, Yuling and Deng, Yufan and Leong, Chak Tou and Du, Hanwen and Fu, Junchen and Li, Youhua and Zhang, Jie and Zhang, Chi and Li, Li-jia and Ni, Yongxin}, title = {Video-Bench: Human-Aligned Video Generation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18858-18868} }
Data Distributional Properties As Inductive Bias for Systematic Generalization: Felipe del Rio,

Alain Raymond-Saez,

Daniel Florea,

Rodrigo Toro Icarte,

Julio Hurtado,

Cristian B. Calderon,

Alvaro Soto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{del_Rio_2025_CVPR, author = {del Rio, Felipe and Raymond-Saez, Alain and Florea, Daniel and Icarte, Rodrigo Toro and Hurtado, Julio and Calderon, Cristian B. and Soto, Alvaro}, title = {Data Distributional Properties As Inductive Bias for Systematic Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25590-25601} }
MergeVQ: A Unified Framework for Visual Generation and Representation with Disentangled Token Merging and Quantization: Siyuan Li,

Luyuan Zhang,

Zedong Wang,

Juanxi Tian,

Cheng Tan,

Zicheng Liu,

Chang Yu,

Qingsong Xie,

Haonan Lu,

Haoqian Wang,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Siyuan and Zhang, Luyuan and Wang, Zedong and Tian, Juanxi and Tan, Cheng and Liu, Zicheng and Yu, Chang and Xie, Qingsong and Lu, Haonan and Wang, Haoqian and Lei, Zhen}, title = {MergeVQ: A Unified Framework for Visual Generation and Representation with Disentangled Token Merging and Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19713-19723} }
InterAct: Advancing Large-Scale Versatile 3D Human-Object Interaction Generation: Sirui Xu,

Dongting Li,

Yucheng Zhang,

Xiyan Xu,

Qi Long,

Ziyin Wang,

Yunzhi Lu,

Shuchang Dong,

Hezi Jiang,

Akshat Gupta,

Yu-Xiong Wang,

Liang-Yan Gui; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Sirui and Li, Dongting and Zhang, Yucheng and Xu, Xiyan and Long, Qi and Wang, Ziyin and Lu, Yunzhi and Dong, Shuchang and Jiang, Hezi and Gupta, Akshat and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterAct: Advancing Large-Scale Versatile 3D Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7048-7060} }
TopoCellGen: Generating Histopathology Cell Topology with a Diffusion Model: Meilong Xu,

Saumya Gupta,

Xiaoling Hu,

Chen Li,

Shahira Abousamra,

Dimitris Samaras,

Prateek Prasanna,

Chao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Meilong and Gupta, Saumya and Hu, Xiaoling and Li, Chen and Abousamra, Shahira and Samaras, Dimitris and Prasanna, Prateek and Chen, Chao}, title = {TopoCellGen: Generating Histopathology Cell Topology with a Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20979-20989} }
Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on Vision-language Models: Jiaming Zhang,

Junhong Ye,

Xingjun Ma,

Yige Li,

Yunfan Yang,

Yunhao Chen,

Jitao Sang,

Dit-Yan Yeung; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiaming and Ye, Junhong and Ma, Xingjun and Li, Yige and Yang, Yunfan and Chen, Yunhao and Sang, Jitao and Yeung, Dit-Yan}, title = {Anyattack: Towards Large-scale Self-supervised Adversarial Attacks on Vision-language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19900-19909} }
Joint Optimization of Neural Radiance Fields and Continuous Camera Motion from a Monocular Video: Hoang Chuong Nguyen,

Wei Mao,

Jose M. Alvarez,

Miaomiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Hoang Chuong and Mao, Wei and Alvarez, Jose M. and Liu, Miaomiao}, title = {Joint Optimization of Neural Radiance Fields and Continuous Camera Motion from a Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11472-11481} }
IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing: Chun Gu,

Xiaofei Wei,

Zixuan Zeng,

Yuxuan Yao,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Chun and Wei, Xiaofei and Zeng, Zixuan and Yao, Yuxuan and Zhang, Li}, title = {IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10943-10952} }
InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions: Sirui Xu,

Hung Yu Ling,

Yu-Xiong Wang,

Liang-Yan Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Sirui and Ling, Hung Yu and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterMimic: Towards Universal Whole-Body Control for Physics-Based Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12266-12277} }
Meta-Learning Hyperparameters for Parameter Efficient Fine-Tuning: Zichen Tian,

Yaoyao Liu,

Qianru Sun; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zichen and Liu, Yaoyao and Sun, Qianru}, title = {Meta-Learning Hyperparameters for Parameter Efficient Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23037-23047} }
TriTex: Learning Texture from a Single Mesh via Triplane Semantic Features: Dana Cohen-Bar,

Daniel Cohen-Or,

Gal Chechik,

Yoni Kasten; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cohen-Bar_2025_CVPR, author = {Cohen-Bar, Dana and Cohen-Or, Daniel and Chechik, Gal and Kasten, Yoni}, title = {TriTex: Learning Texture from a Single Mesh via Triplane Semantic Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21403-21413} }
Efficient Test-time Adaptive Object Detection via Sensitivity-Guided Pruning: Kunyu Wang,

Xueyang Fu,

Xin Lu,

Chengjie Ge,

Chengzhi Cao,

Wei Zhai,

Zheng-Jun Zha; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Kunyu and Fu, Xueyang and Lu, Xin and Ge, Chengjie and Cao, Chengzhi and Zhai, Wei and Zha, Zheng-Jun}, title = {Efficient Test-time Adaptive Object Detection via Sensitivity-Guided Pruning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10577-10586} }
A Data-Centric Revisit of Pre-Trained Vision Models for Robot Learning: Xin Wen,

Bingchen Zhao,

Yilun Chen,

Jiangmiao Pang,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Xin and Zhao, Bingchen and Chen, Yilun and Pang, Jiangmiao and Qi, Xiaojuan}, title = {A Data-Centric Revisit of Pre-Trained Vision Models for Robot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12143-12154} }
Visual Agentic AI for Spatial Reasoning with a Dynamic API: Damiano Marsili,

Rohun Agrawal,

Yisong Yue,

Georgia Gkioxari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marsili_2025_CVPR, author = {Marsili, Damiano and Agrawal, Rohun and Yue, Yisong and Gkioxari, Georgia}, title = {Visual Agentic AI for Spatial Reasoning with a Dynamic API}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19446-19455} }
TAMT: Temporal-Aware Model Tuning for Cross-Domain Few-Shot Action Recognition: Yilong Wang,

Zilin Gao,

Qilong Wang,

Zhaofeng Chen,

Peihua Li,

Qinghua Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yilong and Gao, Zilin and Wang, Qilong and Chen, Zhaofeng and Li, Peihua and Hu, Qinghua}, title = {TAMT: Temporal-Aware Model Tuning for Cross-Domain Few-Shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3449-3459} }
Feature Spectrum Learning for Remote Sensing Change Detection: Qi Zang,

Dong Zhao,

Shuang Wang,

Dou Quan,

Zhun Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Zang_2025_CVPR, author = {Zang, Qi and Zhao, Dong and Wang, Shuang and Quan, Dou and Zhong, Zhun}, title = {Feature Spectrum Learning for Remote Sensing Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12647-12657} }
BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology: Amaya Gallagher-Syed,

Henry Senior,

Omnia Alwazzan,

Elena Pontarini,

Michele Bombardieri,

Costantino Pitzalis,

Myles J. Lewis,

Michael R. Barnes,

Luca Rossi,

Gregory Slabaugh; [pdf] [supp]
[bibtex]
@InProceedings{Gallagher-Syed_2025_CVPR, author = {Gallagher-Syed, Amaya and Senior, Henry and Alwazzan, Omnia and Pontarini, Elena and Bombardieri, Michele and Pitzalis, Costantino and Lewis, Myles J. and Barnes, Michael R. and Rossi, Luca and Slabaugh, Gregory}, title = {BioX-CPath: Biologically-driven Explainable Diagnostics for Multistain IHC Computational Pathology}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10372-10383} }
DriveDreamer4D: World Models Are Effective Data Machines for 4D Driving Scene Representation: Guosheng Zhao,

Chaojun Ni,

Xiaofeng Wang,

Zheng Zhu,

Xueyang Zhang,

Yida Wang,

Guan Huang,

Xinze Chen,

Boyuan Wang,

Youyi Zhang,

Wenjun Mei,

Xingang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Guosheng and Ni, Chaojun and Wang, Xiaofeng and Zhu, Zheng and Zhang, Xueyang and Wang, Yida and Huang, Guan and Chen, Xinze and Wang, Boyuan and Zhang, Youyi and Mei, Wenjun and Wang, Xingang}, title = {DriveDreamer4D: World Models Are Effective Data Machines for 4D Driving Scene Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12015-12026} }
LoKi: Low-dimensional KAN for Efficient Fine-tuning Image Models: Xuan Cai,

Renjie Pan,

Hua Yang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Xuan and Pan, Renjie and Yang, Hua}, title = {LoKi: Low-dimensional KAN for Efficient Fine-tuning Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14869-14880} }
Dr. Splat: Directly Referring 3D Gaussian Splatting via Direct Language Embedding Registration: Kim Jun-Seong,

GeonU Kim,

Kim Yu-Ji,

Yu-Chiang Frank Wang,

Jaesung Choe,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jun-Seong_2025_CVPR, author = {Jun-Seong, Kim and Kim, GeonU and Yu-Ji, Kim and Wang, Yu-Chiang Frank and Choe, Jaesung and Oh, Tae-Hyun}, title = {Dr. Splat: Directly Referring 3D Gaussian Splatting via Direct Language Embedding Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14137-14146} }
Wavelet and Prototype Augmented Query-based Transformer for Pixel-level Surface Defect Detection: Feng Yan,

Xiaoheng Jiang,

Yang Lu,

Jiale Cao,

Dong Chen,

Mingliang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Feng and Jiang, Xiaoheng and Lu, Yang and Cao, Jiale and Chen, Dong and Xu, Mingliang}, title = {Wavelet and Prototype Augmented Query-based Transformer for Pixel-level Surface Defect Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23860-23869} }
GauCho: Gaussian Distributions with Cholesky Decomposition for Oriented Object Detection: José Henrique Lima Marques,

Jeffri Murrugarra-Llerena,

Claudio R. Jung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marques_2025_CVPR, author = {Marques, Jos\'e Henrique Lima and Murrugarra-Llerena, Jeffri and Jung, Claudio R.}, title = {GauCho: Gaussian Distributions with Cholesky Decomposition for Oriented Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3593-3602} }
Alignment, Mining and Fusion: Representation Alignment with Hard Negative Mining and Selective Knowledge Fusion for Medical Visual Question Answering: Yuanhao Zou,

Zhaozheng Yin; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2025_CVPR, author = {Zou, Yuanhao and Yin, Zhaozheng}, title = {Alignment, Mining and Fusion: Representation Alignment with Hard Negative Mining and Selective Knowledge Fusion for Medical Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29623-29633} }
No Thing, Nothing: Highlighting Safety-Critical Classes for Robust LiDAR Semantic Segmentation in Adverse Weather: Junsung Park,

Hwijeong Lee,

Inha Kang,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Junsung and Lee, Hwijeong and Kang, Inha and Shim, Hyunjung}, title = {No Thing, Nothing: Highlighting Safety-Critical Classes for Robust LiDAR Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6690-6699} }
Mind the Gap: Detecting Black-box Adversarial Attacks in the Making through Query Update Analysis: Jeonghwan Park,

Niall McLaughlin,

Ihsen Alouani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jeonghwan and McLaughlin, Niall and Alouani, Ihsen}, title = {Mind the Gap: Detecting Black-box Adversarial Attacks in the Making through Query Update Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10235-10243} }
Consistent Normal Orientation for 3D Point Clouds via Least Squares on Delaunay Graph: Rao Fu,

Jianmin Zheng,

Liang Yu; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Rao and Zheng, Jianmin and Yu, Liang}, title = {Consistent Normal Orientation for 3D Point Clouds via Least Squares on Delaunay Graph}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16932-16942} }
GaussianWorld: Gaussian World Model for Streaming 3D Occupancy Prediction: Sicheng Zuo,

Wenzhao Zheng,

Yuanhui Huang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zuo_2025_CVPR, author = {Zuo, Sicheng and Zheng, Wenzhao and Huang, Yuanhui and Zhou, Jie and Lu, Jiwen}, title = {GaussianWorld: Gaussian World Model for Streaming 3D Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6772-6781} }
ICP: Immediate Compensation Pruning for Mid-to-high Sparsity: Xin Luo,

Xueming Fu,

Zihang Jiang,

S. Kevin Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Xin and Fu, Xueming and Jiang, Zihang and Zhou, S. Kevin}, title = {ICP: Immediate Compensation Pruning for Mid-to-high Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9487-9496} }
VinaBench: Benchmark for Faithful and Consistent Visual Narratives: Silin Gao,

Sheryl Mathew,

Li Mi,

Sepideh Mamooler,

Mengjie Zhao,

Hiromi Wakaki,

Yuki Mitsufuji,

Syrielle Montariol,

Antoine Bosselut; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Silin and Mathew, Sheryl and Mi, Li and Mamooler, Sepideh and Zhao, Mengjie and Wakaki, Hiromi and Mitsufuji, Yuki and Montariol, Syrielle and Bosselut, Antoine}, title = {VinaBench: Benchmark for Faithful and Consistent Visual Narratives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2870-2879} }
ATA: Adaptive Transformation Agent for Text-Guided Subject-Position Variable Background Inpainting: Yizhe Tang,

Zhimin Sun,

Yuzhen Du,

Ran Yi,

Guangben Lu,

Teng Hu,

Luying Li,

Lizhuang Ma,

Fangyuan Zou; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yizhe and Sun, Zhimin and Du, Yuzhen and Yi, Ran and Lu, Guangben and Hu, Teng and Li, Luying and Ma, Lizhuang and Zou, Fangyuan}, title = {ATA: Adaptive Transformation Agent for Text-Guided Subject-Position Variable Background Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18335-18345} }
Optimizing for the Shortest Path in Denoising Diffusion Model: Ping Chen,

Xingpeng Zhang,

Zhaoxiang Liu,

Huan Hu,

Xiang Liu,

Kai Wang,

Min Wang,

Yanlin Qian,

Shiguo Lian; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ping and Zhang, Xingpeng and Liu, Zhaoxiang and Hu, Huan and Liu, Xiang and Wang, Kai and Wang, Min and Qian, Yanlin and Lian, Shiguo}, title = {Optimizing for the Shortest Path in Denoising Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18021-18030} }
Antidote: A Unified Framework for Mitigating LVLM Hallucinations in Counterfactual Presupposition and Object Perception: Yuanchen Wu,

Lu Zhang,

Hang Yao,

Junlong Du,

Ke Yan,

Shouhong Ding,

Yunsheng Wu,

Xiaoqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yuanchen and Zhang, Lu and Yao, Hang and Du, Junlong and Yan, Ke and Ding, Shouhong and Wu, Yunsheng and Li, Xiaoqiang}, title = {Antidote: A Unified Framework for Mitigating LVLM Hallucinations in Counterfactual Presupposition and Object Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14646-14656} }
Language-Guided Audio-Visual Learning for Long-Term Sports Assessment: Huangbiao Xu,

Xiao Ke,

Huanqi Wu,

Rui Xu,

Yuezhou Li,

Wenzhong Guo; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Huangbiao and Ke, Xiao and Wu, Huanqi and Xu, Rui and Li, Yuezhou and Guo, Wenzhong}, title = {Language-Guided Audio-Visual Learning for Long-Term Sports Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23967-23977} }
Dynamic Pseudo Labeling via Gradient Cutting for High-Low Entropy Exploration: Jae Hyeon Park,

Joo Hyeon Jeon,

Jae Yun Lee,

Sangyeon Ahn,

Min Hee Cha,

Min Geol Kim,

Hyeok Nam,

Sung In Cho; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jae Hyeon and Jeon, Joo Hyeon and Lee, Jae Yun and Ahn, Sangyeon and Cha, Min Hee and Kim, Min Geol and Nam, Hyeok and Cho, Sung In}, title = {Dynamic Pseudo Labeling via Gradient Cutting for High-Low Entropy Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20602-20611} }
VODiff: Controlling Object Visibility Order in Text-to-Image Generation: Dong Liang,

Jinyuan Jia,

Yuhao Liu,

Zhanghan Ke,

Hongbo Fu,

Rynson W. H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Dong and Jia, Jinyuan and Liu, Yuhao and Ke, Zhanghan and Fu, Hongbo and Lau, Rynson W. H.}, title = {VODiff: Controlling Object Visibility Order in Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18379-18389} }
Dual Diffusion for Unified Image Generation and Understanding: Zijie Li,

Henry Li,

Yichun Shi,

Amir Barati Farimani,

Yuval Kluger,

Linjie Yang,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zijie and Li, Henry and Shi, Yichun and Farimani, Amir Barati and Kluger, Yuval and Yang, Linjie and Wang, Peng}, title = {Dual Diffusion for Unified Image Generation and Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2779-2790} }
WeakMCN: Multi-task Collaborative Network for Weakly Supervised Referring Expression Comprehension and Segmentation: Silin Cheng,

Yang Liu,

Xinwei He,

Sebastien Ourselin,

Lei Tan,

Gen Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Silin and Liu, Yang and He, Xinwei and Ourselin, Sebastien and Tan, Lei and Luo, Gen}, title = {WeakMCN: Multi-task Collaborative Network for Weakly Supervised Referring Expression Comprehension and Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9175-9185} }
CAD-Llama: Leveraging Large Language Models for Computer-Aided Design Parametric 3D Model Generation: Jiahao Li,

Weijian Ma,

Xueyang Li,

Yunzhong Lou,

Guichun Zhou,

Xiangdong Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiahao and Ma, Weijian and Li, Xueyang and Lou, Yunzhong and Zhou, Guichun and Zhou, Xiangdong}, title = {CAD-Llama: Leveraging Large Language Models for Computer-Aided Design Parametric 3D Model Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18563-18573} }
Leveraging SD Map to Augment HD Map-based Trajectory Prediction: Zhiwei Dong,

Ran Ding,

Wei Li,

Peng Zhang,

Guobin Tang,

Jia Guo; [pdf]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Zhiwei and Ding, Ran and Li, Wei and Zhang, Peng and Tang, Guobin and Guo, Jia}, title = {Leveraging SD Map to Augment HD Map-based Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17219-17228} }
4DGC: Rate-Aware 4D Gaussian Compression for Efficient Streamable Free-Viewpoint Video: Qiang Hu,

Zihan Zheng,

Houqiang Zhong,

Sihua Fu,

Li Song,

Xiaoyun Zhang,

Guangtao Zhai,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Qiang and Zheng, Zihan and Zhong, Houqiang and Fu, Sihua and Song, Li and Zhang, Xiaoyun and Zhai, Guangtao and Wang, Yanfeng}, title = {4DGC: Rate-Aware 4D Gaussian Compression for Efficient Streamable Free-Viewpoint Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {875-885} }
ONDA-Pose: Occlusion-Aware Neural Domain Adaptation for Self-Supervised 6D Object Pose Estimation: Tao Tan,

Qiulei Dong; [pdf]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Tao and Dong, Qiulei}, title = {ONDA-Pose: Occlusion-Aware Neural Domain Adaptation for Self-Supervised 6D Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16829-16838} }
Teller: Real-Time Streaming Audio-Driven Portrait Animation with Autoregressive Motion Generation: Dingcheng Zhen,

Shunshun Yin,

Shiyang Qin,

Hou Yi,

Ziwei Zhang,

Siyuan Liu,

Gan Qi,

Ming Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhen_2025_CVPR, author = {Zhen, Dingcheng and Yin, Shunshun and Qin, Shiyang and Yi, Hou and Zhang, Ziwei and Liu, Siyuan and Qi, Gan and Tao, Ming}, title = {Teller: Real-Time Streaming Audio-Driven Portrait Animation with Autoregressive Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21075-21085} }
GASP: Gaussian Avatars with Synthetic Priors: Jack Saunders,

Charlie Hewitt,

Yanan Jian,

Marek Kowalski,

Tadas Baltrusaitis,

Yiye Chen,

Darren Cosker,

Virginia Estellers,

Nicholas Gydé,

Vinay P. Namboodiri,

Benjamin E. Lundell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saunders_2025_CVPR, author = {Saunders, Jack and Hewitt, Charlie and Jian, Yanan and Kowalski, Marek and Baltrusaitis, Tadas and Chen, Yiye and Cosker, Darren and Estellers, Virginia and Gyd\'e, Nicholas and Namboodiri, Vinay P. and Lundell, Benjamin E.}, title = {GASP: Gaussian Avatars with Synthetic Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {271-280} }
Q-PART: Quasi-Periodic Adaptive Regression with Test-time Training for Pediatric Left Ventricular Ejection Fraction Regression: Jie Liu,

Tiexin Qin,

Hui Liu,

Yilei Shi,

Lichao Mou,

Xiao Xiang Zhu,

Shiqi Wang,

Haoliang Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jie and Qin, Tiexin and Liu, Hui and Shi, Yilei and Mou, Lichao and Zhu, Xiao Xiang and Wang, Shiqi and Li, Haoliang}, title = {Q-PART: Quasi-Periodic Adaptive Regression with Test-time Training for Pediatric Left Ventricular Ejection Fraction Regression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15560-15569} }
Composing Parts for Expressive Object Generation: Harsh Rangwani,

Aishwarya Agarwal,

Kuldeep Kulkarni,

R. Venkatesh Babu,

Srikrishna Karanam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rangwani_2025_CVPR, author = {Rangwani, Harsh and Agarwal, Aishwarya and Kulkarni, Kuldeep and Babu, R. Venkatesh and Karanam, Srikrishna}, title = {Composing Parts for Expressive Object Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13209-13219} }
CarPlanner: Consistent Auto-regressive Trajectory Planning for Large-Scale Reinforcement Learning in Autonomous Driving: Dongkun Zhang,

Jiaming Liang,

Ke Guo,

Sha Lu,

Qi Wang,

Rong Xiong,

Zhenwei Miao,

Yue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dongkun and Liang, Jiaming and Guo, Ke and Lu, Sha and Wang, Qi and Xiong, Rong and Miao, Zhenwei and Wang, Yue}, title = {CarPlanner: Consistent Auto-regressive Trajectory Planning for Large-Scale Reinforcement Learning in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17239-17248} }
Apply Hierarchical-Chain-of-Generation to Complex Attributes Text-to-3D Generation: Yiming Qin,

Zhu Xu,

Yang Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Yiming and Xu, Zhu and Liu, Yang}, title = {Apply Hierarchical-Chain-of-Generation to Complex Attributes Text-to-3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18521-18530} }
PersonaHOI: Effortlessly Improving Face Personalization in Human-Object Interaction Generation: Xinting Hu,

Haoran Wang,

Jan Eric Lenssen,

Bernt Schiele; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Xinting and Wang, Haoran and Lenssen, Jan Eric and Schiele, Bernt}, title = {PersonaHOI: Effortlessly Improving Face Personalization in Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23775-23784} }
Video-Panda: Parameter-efficient Alignment for Encoder-free Video-Language Models: Jinhui Yi,

Syed Talal Wasim,

Yanan Luo,

Muzammal Naseer,

Juergen Gall; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2025_CVPR, author = {Yi, Jinhui and Wasim, Syed Talal and Luo, Yanan and Naseer, Muzammal and Gall, Juergen}, title = {Video-Panda: Parameter-efficient Alignment for Encoder-free Video-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24119-24128} }
COSMIC: Clique-Oriented Semantic Multi-space Integration for Robust CLIP Test-Time Adaptation: Fanding Huang,

Jingyan Jiang,

Qinting Jiang,

Hebei Li,

Faisal Nadeem Khan,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Fanding and Jiang, Jingyan and Jiang, Qinting and Li, Hebei and Khan, Faisal Nadeem and Wang, Zhi}, title = {COSMIC: Clique-Oriented Semantic Multi-space Integration for Robust CLIP Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9772-9781} }
MonoSplat: Generalizable 3D Gaussian Splatting from Monocular Depth Foundation Models: Yifan Liu,

Keyu Fan,

Weihao Yu,

Chenxin Li,

Hao Lu,

Yixuan Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yifan and Fan, Keyu and Yu, Weihao and Li, Chenxin and Lu, Hao and Yuan, Yixuan}, title = {MonoSplat: Generalizable 3D Gaussian Splatting from Monocular Depth Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21570-21579} }
Hybrid Global-Local Representation with Augmented Spatial Guidance for Zero-Shot Referring Image Segmentation: Ting Liu,

Siyuan Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Ting and Li, Siyuan}, title = {Hybrid Global-Local Representation with Augmented Spatial Guidance for Zero-Shot Referring Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29634-29643} }
SOLVE: Synergy of Language-Vision and End-to-End Networks for Autonomous Driving: Xuesong Chen,

Linjiang Huang,

Tao Ma,

Rongyao Fang,

Shaoshuai Shi,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xuesong and Huang, Linjiang and Ma, Tao and Fang, Rongyao and Shi, Shaoshuai and Li, Hongsheng}, title = {SOLVE: Synergy of Language-Vision and End-to-End Networks for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12068-12077} }
Shift the Lens: Environment-Aware Unsupervised Camouflaged Object Detection: Ji Du,

Fangwei Hao,

Mingyang Yu,

Desheng Kong,

Jiesheng Wu,

Bin Wang,

Jing Xu,

Ping Li; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Ji and Hao, Fangwei and Yu, Mingyang and Kong, Desheng and Wu, Jiesheng and Wang, Bin and Xu, Jing and Li, Ping}, title = {Shift the Lens: Environment-Aware Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19271-19282} }
Probability Density Geodesics in Image Diffusion Latent Space: Qingtao Yu,

Jaskirat Singh,

Zhaoyuan Yang,

Peter Henry Tu,

Jing Zhang,

Hongdong Li,

Richard Hartley,

Dylan Campbell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Qingtao and Singh, Jaskirat and Yang, Zhaoyuan and Tu, Peter Henry and Zhang, Jing and Li, Hongdong and Hartley, Richard and Campbell, Dylan}, title = {Probability Density Geodesics in Image Diffusion Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27989-27998} }
High-quality Point Cloud Oriented Normal Estimation via Hybrid Angular and Euclidean Distance Encoding: Yuanqi Li,

Jingcheng Huang,

Hongshen Wang,

Peiyuan Lv,

Yansong Liu,

Jiuming Zheng,

Jie Guo,

Yanwen Guo; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuanqi and Huang, Jingcheng and Wang, Hongshen and Lv, Peiyuan and Liu, Yansong and Zheng, Jiuming and Guo, Jie and Guo, Yanwen}, title = {High-quality Point Cloud Oriented Normal Estimation via Hybrid Angular and Euclidean Distance Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1287-1296} }
DriveScape: High-Resolution Driving Video Generation by Multi-View Feature Fusion: Wei Wu,

Xi Guo,

Weixuan Tang,

Tingxuan Huang,

Chiyu Wang,

Chenjing Ding; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Wei and Guo, Xi and Tang, Weixuan and Huang, Tingxuan and Wang, Chiyu and Ding, Chenjing}, title = {DriveScape: High-Resolution Driving Video Generation by Multi-View Feature Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17187-17196} }
Training-free Neural Architecture Search through Variance of Knowledge of Deep Network Weights: Ondrej Tybl,

Lukas Neumann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tybl_2025_CVPR, author = {Tybl, Ondrej and Neumann, Lukas}, title = {Training-free Neural Architecture Search through Variance of Knowledge of Deep Network Weights}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14881-14890} }
Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond: Guanyao Wu,

Haoyu Liu,

Hongming Fu,

Yichuan Peng,

Jinyuan Liu,

Xin Fan,

Risheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Guanyao and Liu, Haoyu and Fu, Hongming and Peng, Yichuan and Liu, Jinyuan and Fan, Xin and Liu, Risheng}, title = {Every SAM Drop Counts: Embracing Semantic Priors for Multi-Modality Image Fusion and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17882-17891} }
EgoLife: Towards Egocentric Life Assistant: Jingkang Yang,

Shuai Liu,

Hongming Guo,

Yuhao Dong,

Xiamengwei Zhang,

Sicheng Zhang,

Pengyun Wang,

Zitang Zhou,

Binzhu Xie,

Ziyue Wang,

Bei Ouyang,

Zhengyu Lin,

Marco Cominelli,

Zhongang Cai,

Bo Li,

Yuanhan Zhang,

Peiyuan Zhang,

Fangzhou Hong,

Joerg Widmer,

Francesco Gringoli,

Lei Yang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jingkang and Liu, Shuai and Guo, Hongming and Dong, Yuhao and Zhang, Xiamengwei and Zhang, Sicheng and Wang, Pengyun and Zhou, Zitang and Xie, Binzhu and Wang, Ziyue and Ouyang, Bei and Lin, Zhengyu and Cominelli, Marco and Cai, Zhongang and Li, Bo and Zhang, Yuanhan and Zhang, Peiyuan and Hong, Fangzhou and Widmer, Joerg and Gringoli, Francesco and Yang, Lei and Liu, Ziwei}, title = {EgoLife: Towards Egocentric Life Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28885-28900} }
RAEncoder: A Label-Free Reversible Adversarial Examples Encoder for Dataset Intellectual Property Protection: Fan Xing,

Zhuo Tian,

Xuefeng Fan,

Xiaoyi Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Fan and Tian, Zhuo and Fan, Xuefeng and Zhou, Xiaoyi}, title = {RAEncoder: A Label-Free Reversible Adversarial Examples Encoder for Dataset Intellectual Property Protection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20665-20674} }
BrepGiff: Lightweight Generation of Complex B-rep with 3D GAT Diffusion: Hao Guo,

Xiaoshui Huang,

Hao jiacheng,

Yunpeng Bai,

Hongping Gan,

Yilei Shi; [pdf]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Hao and Huang, Xiaoshui and jiacheng, Hao and Bai, Yunpeng and Gan, Hongping and Shi, Yilei}, title = {BrepGiff: Lightweight Generation of Complex B-rep with 3D GAT Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26587-26596} }
Towards Fine-Grained Interpretability: Counterfactual Explanations for Misclassification with Saliency Partition: Lintong Zhang,

Kang Yin,

Seong-Whan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lintong and Yin, Kang and Lee, Seong-Whan}, title = {Towards Fine-Grained Interpretability: Counterfactual Explanations for Misclassification with Saliency Partition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30053-30062} }
Prior-free 3D Object Tracking: Xiuqiang Song,

Li Jin,

Zhengxian Zhang,

Jiachen Li,

Fan Zhong,

Guofeng Zhang,

Xueying Qin; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Xiuqiang and Jin, Li and Zhang, Zhengxian and Li, Jiachen and Zhong, Fan and Zhang, Guofeng and Qin, Xueying}, title = {Prior-free 3D Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1200-1209} }
LatentHOI: On the Generalizable Hand Object Motion Generation with Latent Hand Diffusion.: Muchen Li,

Sammy Christen,

Chengde Wan,

Yujun Cai,

Renjie Liao,

Leonid Sigal,

Shugao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Muchen and Christen, Sammy and Wan, Chengde and Cai, Yujun and Liao, Renjie and Sigal, Leonid and Ma, Shugao}, title = {LatentHOI: On the Generalizable Hand Object Motion Generation with Latent Hand Diffusion.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17416-17425} }
Progressive Correspondence Regenerator for Robust 3D Registration: Guiyu Zhao,

Sheng Ao,

Ye Zhang,

Kai Xu,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Guiyu and Ao, Sheng and Zhang, Ye and Xu, Kai and Guo, Yulan}, title = {Progressive Correspondence Regenerator for Robust 3D Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1210-1219} }
Cross-Modal 3D Representation with Multi-View Images and Point Clouds: Ziyang Zhou,

Pinghui Wang,

Zi Liang,

Haitao Bai,

Ruofei Zhang; [pdf]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Ziyang and Wang, Pinghui and Liang, Zi and Bai, Haitao and Zhang, Ruofei}, title = {Cross-Modal 3D Representation with Multi-View Images and Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3728-3739} }
Chapter-Llama: Efficient Chaptering in Hour-Long Videos with LLMs: Lucas Ventura,

Antoine Yang,

Cordelia Schmid,

Gül Varol; [pdf] [supp]
[bibtex]
@InProceedings{Ventura_2025_CVPR, author = {Ventura, Lucas and Yang, Antoine and Schmid, Cordelia and Varol, G\"ul}, title = {Chapter-Llama: Efficient Chaptering in Hour-Long Videos with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18947-18958} }
Decompositional Neural Scene Reconstruction with Generative Diffusion Prior: Junfeng Ni,

Yu Liu,

Ruijie Lu,

Zirui Zhou,

Song-Chun Zhu,

Yixin Chen,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Junfeng and Liu, Yu and Lu, Ruijie and Zhou, Zirui and Zhu, Song-Chun and Chen, Yixin and Huang, Siyuan}, title = {Decompositional Neural Scene Reconstruction with Generative Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6022-6033} }
Distribution Prototype Diffusion Learning for Open-set Supervised Anomaly Detection: Fuyun Wang,

Tong Zhang,

Yuanzhi Wang,

Yide Qiu,

Xin Liu,

Xu Guo,

Zhen Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Fuyun and Zhang, Tong and Wang, Yuanzhi and Qiu, Yide and Liu, Xin and Guo, Xu and Cui, Zhen}, title = {Distribution Prototype Diffusion Learning for Open-set Supervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20416-20426} }
Learning Visual Generative Priors without Text: Shuailei Ma,

Kecheng Zheng,

Ying Wei,

Wei Wu,

Fan Lu,

Yifei Zhang,

Chen-Wei Xie,

Biao Gong,

Jiapeng Zhu,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Shuailei and Zheng, Kecheng and Wei, Ying and Wu, Wei and Lu, Fan and Zhang, Yifei and Xie, Chen-Wei and Gong, Biao and Zhu, Jiapeng and Shen, Yujun}, title = {Learning Visual Generative Priors without Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8051-8061} }
Joint Scheduling of Causal Prompts and Tasks for Multi-Task Learning: Chaoyang Li,

Jianyang Qin,

Jinhao Cui,

Zeyu Liu,

Ning Hu,

Qing Liao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Chaoyang and Qin, Jianyang and Cui, Jinhao and Liu, Zeyu and Hu, Ning and Liao, Qing}, title = {Joint Scheduling of Causal Prompts and Tasks for Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25124-25134} }
Full-DoF Egomotion Estimation for Event Cameras Using Geometric Solvers: Ji Zhao,

Banglei Guan,

Zibin Liu,

Laurent Kneip; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ji and Guan, Banglei and Liu, Zibin and Kneip, Laurent}, title = {Full-DoF Egomotion Estimation for Event Cameras Using Geometric Solvers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11515-11524} }
3DGUT: Enabling Distorted Cameras and Secondary Rays in Gaussian Splatting: Qi Wu,

Janick Martinez Esturo,

Ashkan Mirzaei,

Nicolas Moënne-Loccoz,

Zan Gojcic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Qi and Esturo, Janick Martinez and Mirzaei, Ashkan and Mo\"enne-Loccoz, Nicolas and Gojcic, Zan}, title = {3DGUT: Enabling Distorted Cameras and Secondary Rays in Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26036-26046} }
Teaching Large Language Models to Regress Accurate Image Quality Scores Using Score Distribution: Zhiyuan You,

Xin Cai,

Jinjin Gu,

Tianfan Xue,

Chao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Zhiyuan and Cai, Xin and Gu, Jinjin and Xue, Tianfan and Dong, Chao}, title = {Teaching Large Language Models to Regress Accurate Image Quality Scores Using Score Distribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14483-14494} }
Lifting the Veil on Visual Information Flow in MLLMs: Unlocking Pathways to Faster Inference: Hao Yin,

Guangzong Si,

Zilei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Hao and Si, Guangzong and Wang, Zilei}, title = {Lifting the Veil on Visual Information Flow in MLLMs: Unlocking Pathways to Faster Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9382-9391} }
Your Scale Factors are My Weapon: Targeted Bit-Flip Attacks on Vision Transformers via Scale Factor Manipulation: Jialai Wang,

Yuxiao Wu,

Weiye Xu,

Yating Huang,

Chao Zhang,

Zongpeng Li,

Mingwei Xu,

Zhenkai Liang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jialai and Wu, Yuxiao and Xu, Weiye and Huang, Yating and Zhang, Chao and Li, Zongpeng and Xu, Mingwei and Liang, Zhenkai}, title = {Your Scale Factors are My Weapon: Targeted Bit-Flip Attacks on Vision Transformers via Scale Factor Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20103-20112} }
Marten: Visual Question Answering with Mask Generation for Multi-modal Document Understanding: Zining Wang,

Tongkun Guan,

Pei Fu,

Chen Duan,

Qianyi Jiang,

Zhentao Guo,

Shan Guo,

Junfeng Luo,

Wei Shen,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zining and Guan, Tongkun and Fu, Pei and Duan, Chen and Jiang, Qianyi and Guo, Zhentao and Guo, Shan and Luo, Junfeng and Shen, Wei and Yang, Xiaokang}, title = {Marten: Visual Question Answering with Mask Generation for Multi-modal Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14460-14471} }
Mamba-Reg: Vision Mamba Also Needs Registers: Feng Wang,

Jiahao Wang,

Sucheng Ren,

Guoyizhe Wei,

Jieru Mei,

Wei Shao,

Yuyin Zhou,

Alan Yuille,

Cihang Xie; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Feng and Wang, Jiahao and Ren, Sucheng and Wei, Guoyizhe and Mei, Jieru and Shao, Wei and Zhou, Yuyin and Yuille, Alan and Xie, Cihang}, title = {Mamba-Reg: Vision Mamba Also Needs Registers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14944-14953} }
It's a (Blind) Match! Towards Vision-Language Correspondence without Parallel Data: Dominik Schnaus,

Nikita Araslanov,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Schnaus_2025_CVPR, author = {Schnaus, Dominik and Araslanov, Nikita and Cremers, Daniel}, title = {It's a (Blind) Match! Towards Vision-Language Correspondence without Parallel Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24983-24992} }
Open Set Label Shift with Test Time Out-of-Distribution Reference: Changkun Ye,

Russell Tsuchida,

Lars Petersson,

Nick Barnes; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Changkun and Tsuchida, Russell and Petersson, Lars and Barnes, Nick}, title = {Open Set Label Shift with Test Time Out-of-Distribution Reference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30619-30629} }
Visual Persona: Foundation Model for Full-Body Human Customization: Jisu Nam,

Soowon Son,

Zhan Xu,

Jing Shi,

Difan Liu,

Feng Liu,

Seungryong Kim,

Yang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_CVPR, author = {Nam, Jisu and Son, Soowon and Xu, Zhan and Shi, Jing and Liu, Difan and Liu, Feng and Kim, Seungryong and Zhou, Yang}, title = {Visual Persona: Foundation Model for Full-Body Human Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18630-18641} }
SOGS: Second-Order Anchor for Advanced 3D Gaussian Splatting: Jiahui Zhang,

Fangneng Zhan,

Ling Shao,

Shijian Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiahui and Zhan, Fangneng and Shao, Ling and Lu, Shijian}, title = {SOGS: Second-Order Anchor for Advanced 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11167-11176} }
GaussianFormer-2: Probabilistic Gaussian Superposition for Efficient 3D Occupancy Prediction: Yuanhui Huang,

Amonnut Thammatadatrakoon,

Wenzhao Zheng,

Yunpeng Zhang,

Dalong Du,

Jiwen Lu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yuanhui and Thammatadatrakoon, Amonnut and Zheng, Wenzhao and Zhang, Yunpeng and Du, Dalong and Lu, Jiwen}, title = {GaussianFormer-2: Probabilistic Gaussian Superposition for Efficient 3D Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27477-27486} }
MExD: An Expert-Infused Diffusion Model for Whole-Slide Image Classification: Jianwei Zhao,

Xin Li,

Fan Yang,

Qiang Zhai,

Ao Luo,

Yang Zhao,

Hong Cheng,

Huazhu Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jianwei and Li, Xin and Yang, Fan and Zhai, Qiang and Luo, Ao and Zhao, Yang and Cheng, Hong and Fu, Huazhu}, title = {MExD: An Expert-Infused Diffusion Model for Whole-Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20789-20799} }
Flexible Frame Selection for Efficient Video Reasoning: Shyamal Buch,

Arsha Nagrani,

Anurag Arnab,

Cordelia Schmid; [pdf] [supp]
[bibtex]
@InProceedings{Buch_2025_CVPR, author = {Buch, Shyamal and Nagrani, Arsha and Arnab, Anurag and Schmid, Cordelia}, title = {Flexible Frame Selection for Efficient Video Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29071-29082} }
EventGPT: Event Stream Understanding with Multimodal Large Language Models: Shaoyu Liu,

Jianing Li,

Guanghui Zhao,

Yunjian Zhang,

Xin Meng,

Fei Richard Yu,

Xiangyang Ji,

Ming Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shaoyu and Li, Jianing and Zhao, Guanghui and Zhang, Yunjian and Meng, Xin and Yu, Fei Richard and Ji, Xiangyang and Li, Ming}, title = {EventGPT: Event Stream Understanding with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29139-29149} }
Pixel-level and Semantic-level Adjustable Super-resolution: A Dual-LoRA Approach: Lingchen Sun,

Rongyuan Wu,

Zhiyuan Ma,

Shuaizheng Liu,

Qiaosi Yi,

Lei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Lingchen and Wu, Rongyuan and Ma, Zhiyuan and Liu, Shuaizheng and Yi, Qiaosi and Zhang, Lei}, title = {Pixel-level and Semantic-level Adjustable Super-resolution: A Dual-LoRA Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2333-2343} }
Let Samples Speak: Mitigating Spurious Correlation by Exploiting the Clusterness of Samples: Weiwei Li,

Junzhuo Liu,

Yuanyuan Ren,

Yuchen Zheng,

Yahao Liu,

Wen Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Weiwei and Liu, Junzhuo and Ren, Yuanyuan and Zheng, Yuchen and Liu, Yahao and Li, Wen}, title = {Let Samples Speak: Mitigating Spurious Correlation by Exploiting the Clusterness of Samples}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15486-15496} }
Mr. DETR: Instructive Multi-Route Training for Detection Transformers: Chang-Bin Zhang,

Yujie Zhong,

Kai Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chang-Bin and Zhong, Yujie and Han, Kai}, title = {Mr. DETR: Instructive Multi-Route Training for Detection Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9933-9943} }
MITracker: Multi-View Integration for Visual Object Tracking: Mengjie Xu,

Yitao Zhu,

Haotian Jiang,

Jiaming Li,

Zhenrong Shen,

Sheng Wang,

Haolin Huang,

Xinyu Wang,

Han Zhang,

Qing Yang,

Qian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Mengjie and Zhu, Yitao and Jiang, Haotian and Li, Jiaming and Shen, Zhenrong and Wang, Sheng and Huang, Haolin and Wang, Xinyu and Zhang, Han and Yang, Qing and Wang, Qian}, title = {MITracker: Multi-View Integration for Visual Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27176-27185} }
Hearing Hands: Generating Sounds from Physical Interactions in 3D Scenes: Yiming Dou,

Wonseok Oh,

Yuqing Luo,

Antonio Loquercio,

Andrew Owens; [pdf] [arXiv]
[bibtex]
@InProceedings{Dou_2025_CVPR, author = {Dou, Yiming and Oh, Wonseok and Luo, Yuqing and Loquercio, Antonio and Owens, Andrew}, title = {Hearing Hands: Generating Sounds from Physical Interactions in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1795-1804} }
AirRoom: Objects Matter in Room Reidentification: Runmao Yao,

Yi Du,

Zhuoqun Chen,

Haoze Zheng,

Chen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Runmao and Du, Yi and Chen, Zhuoqun and Zheng, Haoze and Wang, Chen}, title = {AirRoom: Objects Matter in Room Reidentification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1385-1394} }
Not Only Text: Exploring Compositionality of Visual Representations in Vision-Language Models: Davide Berasi,

Matteo Farina,

Massimiliano Mancini,

Elisa Ricci,

Nicola Strisciuglio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berasi_2025_CVPR, author = {Berasi, Davide and Farina, Matteo and Mancini, Massimiliano and Ricci, Elisa and Strisciuglio, Nicola}, title = {Not Only Text: Exploring Compositionality of Visual Representations in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24917-24927} }
DefMamba: Deformable Visual State Space Model: Leiye Liu,

Miao Zhang,

Jihao Yin,

Tingwei Liu,

Wei Ji,

Yongri Piao,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Leiye and Zhang, Miao and Yin, Jihao and Liu, Tingwei and Ji, Wei and Piao, Yongri and Lu, Huchuan}, title = {DefMamba: Deformable Visual State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8838-8847} }
HOP: Heterogeneous Topology-based Multimodal Entanglement for Co-Speech Gesture Generation: Hongye Cheng,

Tianyu Wang,

Guangsi Shi,

Zexing Zhao,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Hongye and Wang, Tianyu and Shi, Guangsi and Zhao, Zexing and Fu, Yanwei}, title = {HOP: Heterogeneous Topology-based Multimodal Entanglement for Co-Speech Gesture Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {906-916} }
VoxelSplat: Dynamic Gaussian Splatting as an Effective Loss for Occupancy and Flow Prediction: Ziyue Zhu,

Shenlong Wang,

Jin Xie,

Jiang-jiang Liu,

Jingdong Wang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ziyue and Wang, Shenlong and Xie, Jin and Liu, Jiang-jiang and Wang, Jingdong and Yang, Jian}, title = {VoxelSplat: Dynamic Gaussian Splatting as an Effective Loss for Occupancy and Flow Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6761-6771} }
Exploring Scene Affinity for Semi-Supervised LiDAR Semantic Segmentation: Chuandong Liu,

Xingxing Weng,

Shuguo Jiang,

Pengcheng Li,

Lei Yu,

Gui-Song Xia; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Chuandong and Weng, Xingxing and Jiang, Shuguo and Li, Pengcheng and Yu, Lei and Xia, Gui-Song}, title = {Exploring Scene Affinity for Semi-Supervised LiDAR Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27380-27389} }
ControlFace: Harnessing Facial Parametric Control for Face Rigging: Wooseok Jang,

Youngjun Hong,

Geonho Cha,

Seungryong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Wooseok and Hong, Youngjun and Cha, Geonho and Kim, Seungryong}, title = {ControlFace: Harnessing Facial Parametric Control for Face Rigging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5614-5624} }
Minority-Focused Text-to-Image Generation via Prompt Optimization: Soobin Um,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Um_2025_CVPR, author = {Um, Soobin and Ye, Jong Chul}, title = {Minority-Focused Text-to-Image Generation via Prompt Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20926-20936} }
Imputation-free and Alignment-free: Incomplete Multi-view Clustering Driven by Consensus Semantic Learning: Yuzhuo Dai,

Jiaqi Jin,

Zhibin Dong,

Siwei Wang,

Xinwang Liu,

En Zhu,

Xihong Yang,

Xinbiao Gan,

Yu Feng; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Yuzhuo and Jin, Jiaqi and Dong, Zhibin and Wang, Siwei and Liu, Xinwang and Zhu, En and Yang, Xihong and Gan, Xinbiao and Feng, Yu}, title = {Imputation-free and Alignment-free: Incomplete Multi-view Clustering Driven by Consensus Semantic Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5071-5081} }
Sensitivity-Aware Efficient Fine-Tuning via Compact Dynamic-Rank Adaptation: Tianran Chen,

Jiarui Chen,

Baoquan Zhang,

Zhehao Yu,

Shidong Chen,

Rui Ye,

Xutao Li,

Yunming Ye; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Tianran and Chen, Jiarui and Zhang, Baoquan and Yu, Zhehao and Chen, Shidong and Ye, Rui and Li, Xutao and Ye, Yunming}, title = {Sensitivity-Aware Efficient Fine-Tuning via Compact Dynamic-Rank Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9655-9664} }
MANTA: A Large-Scale Multi-View and Visual-Text Anomaly Detection Dataset for Tiny Objects: Lei Fan,

Dongdong Fan,

Zhiguang Hu,

Yiwen Ding,

Donglin Di,

Kai Yi,

Maurice Pagnucco,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Lei and Fan, Dongdong and Hu, Zhiguang and Ding, Yiwen and Di, Donglin and Yi, Kai and Pagnucco, Maurice and Song, Yang}, title = {MANTA: A Large-Scale Multi-View and Visual-Text Anomaly Detection Dataset for Tiny Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25518-25527} }
MoDec-GS: Global-to-Local Motion Decomposition and Temporal Interval Adjustment for Compact Dynamic 3D Gaussian Splatting: Sangwoon Kwak,

Joonsoo Kim,

Jun Young Jeong,

Won-Sik Cheong,

Jihyong Oh,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kwak_2025_CVPR, author = {Kwak, Sangwoon and Kim, Joonsoo and Jeong, Jun Young and Cheong, Won-Sik and Oh, Jihyong and Kim, Munchurl}, title = {MoDec-GS: Global-to-Local Motion Decomposition and Temporal Interval Adjustment for Compact Dynamic 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11338-11348} }
DaCapo: Score Distillation as Stacked Bridge for Fast and High-quality 3D Editing: Yufei Huang,

Bangyan Liao,

Yuqi Hu,

Haitao Lin,

Lirong Wu,

Siyuan Li,

Cheng Tan,

Zicheng Liu,

Yunfan Liu,

Zelin Zang,

Chang Yu,

Zhen Lei; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yufei and Liao, Bangyan and Hu, Yuqi and Lin, Haitao and Wu, Lirong and Li, Siyuan and Tan, Cheng and Liu, Zicheng and Liu, Yunfan and Zang, Zelin and Yu, Chang and Lei, Zhen}, title = {DaCapo: Score Distillation as Stacked Bridge for Fast and High-quality 3D Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16304-16313} }
A Selective Re-learning Mechanism for Hyperspectral Fusion Imaging: Yuanye Liu,

Jinyang Liu,

Renwei Dian,

Shutao Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanye and Liu, Jinyang and Dian, Renwei and Li, Shutao}, title = {A Selective Re-learning Mechanism for Hyperspectral Fusion Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7437-7446} }
SCSegamba: Lightweight Structure-Aware Vision Mamba for Crack Segmentation in Structures: Hui Liu,

Chen Jia,

Fan Shi,

Xu Cheng,

Shengyong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hui and Jia, Chen and Shi, Fan and Cheng, Xu and Chen, Shengyong}, title = {SCSegamba: Lightweight Structure-Aware Vision Mamba for Crack Segmentation in Structures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29406-29416} }
Autoregressive Sequential Pretraining for Visual Tracking: Shiyi Liang,

Yifan Bai,

Yihong Gong,

Xing Wei; [pdf]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Shiyi and Bai, Yifan and Gong, Yihong and Wei, Xing}, title = {Autoregressive Sequential Pretraining for Visual Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7254-7264} }
Number it: Temporal Grounding Videos like Flipping Manga: Yongliang Wu,

Xinting Hu,

Yuyang Sun,

Yizhou Zhou,

Wenbo Zhu,

Fengyun Rao,

Bernt Schiele,

Xu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yongliang and Hu, Xinting and Sun, Yuyang and Zhou, Yizhou and Zhu, Wenbo and Rao, Fengyun and Schiele, Bernt and Yang, Xu}, title = {Number it: Temporal Grounding Videos like Flipping Manga}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13754-13765} }
Collaborative Tree Search for Enhancing Embodied Multi-Agent Collaboration: Lizheng Zu,

Lin Lin,

Song Fu,

Na Zhao,

Pan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zu_2025_CVPR, author = {Zu, Lizheng and Lin, Lin and Fu, Song and Zhao, Na and Zhou, Pan}, title = {Collaborative Tree Search for Enhancing Embodied Multi-Agent Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29513-29522} }
PromptHMR: Promptable Human Mesh Recovery: Yufu Wang,

Yu Sun,

Priyanka Patel,

Kostas Daniilidis,

Michael J. Black,

Muhammed Kocabas; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yufu and Sun, Yu and Patel, Priyanka and Daniilidis, Kostas and Black, Michael J. and Kocabas, Muhammed}, title = {PromptHMR: Promptable Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1148-1159} }
SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction: Enrico Pallotta,

Sina Mokhtarzadeh Azar,

Shuai Li,

Olga Zatsarynna,

Juergen Gall; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pallotta_2025_CVPR, author = {Pallotta, Enrico and Azar, Sina Mokhtarzadeh and Li, Shuai and Zatsarynna, Olga and Gall, Juergen}, title = {SyncVP: Joint Diffusion for Synchronous Multi-Modal Video Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13787-13797} }
HUSH: Holistic Panoramic 3D Scene Understanding using Spherical Harmonics: Jongsung Lee,

Harin Park,

Byeong-Uk Lee,

Kyungdon Joo; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jongsung and Park, Harin and Lee, Byeong-Uk and Joo, Kyungdon}, title = {HUSH: Holistic Panoramic 3D Scene Understanding using Spherical Harmonics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16599-16608} }
SkillMimic: Learning Basketball Interaction Skills from Demonstrations: Yinhuai Wang,

Qihan Zhao,

Runyi Yu,

Hok Wai Tsui,

Ailing Zeng,

Jing Lin,

Zhengyi Luo,

Jiwen Yu,

Xiu Li,

Qifeng Chen,

Jian Zhang,

Lei Zhang,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yinhuai and Zhao, Qihan and Yu, Runyi and Tsui, Hok Wai and Zeng, Ailing and Lin, Jing and Luo, Zhengyi and Yu, Jiwen and Li, Xiu and Chen, Qifeng and Zhang, Jian and Zhang, Lei and Tan, Ping}, title = {SkillMimic: Learning Basketball Interaction Skills from Demonstrations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17540-17549} }
VISTREAM: Improving Computation Efficiency of Visual Streaming Perception via Law-of-Charge-Conservation Inspired Spiking Neural Network: Kang You,

Ziling Wei,

Jing Yan,

Boning Zhang,

Qinghai Guo,

Yaoyu Zhang,

Zhezhi He; [pdf] [supp]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Kang and Wei, Ziling and Yan, Jing and Zhang, Boning and Guo, Qinghai and Zhang, Yaoyu and He, Zhezhi}, title = {VISTREAM: Improving Computation Efficiency of Visual Streaming Perception via Law-of-Charge-Conservation Inspired Spiking Neural Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8796-8805} }
STPro: Spatial and Temporal Progressive Learning for Weakly Supervised Spatio-Temporal Grounding: Aaryan Garg,

Akash Kumar,

Yogesh S Rawat; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garg_2025_CVPR, author = {Garg, Aaryan and Kumar, Akash and Rawat, Yogesh S}, title = {STPro: Spatial and Temporal Progressive Learning for Weakly Supervised Spatio-Temporal Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3384-3394} }
RGBAvatar: Reduced Gaussian Blendshapes for Online Modeling of Head Avatars: Linzhou Li,

Yumeng Li,

Yanlin Weng,

Youyi Zheng,

Kun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Linzhou and Li, Yumeng and Weng, Yanlin and Zheng, Youyi and Zhou, Kun}, title = {RGBAvatar: Reduced Gaussian Blendshapes for Online Modeling of Head Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10747-10757} }
Rashomon Sets for Prototypical-Part Networks: Editing Interpretable Models in Real-Time: Jon Donnelly,

Zhicheng Guo,

Alina Jade Barnett,

Hayden McTavish,

Chaofan Chen,

Cynthia Rudin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Donnelly_2025_CVPR, author = {Donnelly, Jon and Guo, Zhicheng and Barnett, Alina Jade and McTavish, Hayden and Chen, Chaofan and Rudin, Cynthia}, title = {Rashomon Sets for Prototypical-Part Networks: Editing Interpretable Models in Real-Time}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4528-4538} }
EEE-Bench: A Comprehensive Multimodal Electrical And Electronics Engineering Benchmark: Ming Li,

Jike Zhong,

Tianle Chen,

Yuxiang Lai,

Konstantinos Psounis; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ming and Zhong, Jike and Chen, Tianle and Lai, Yuxiang and Psounis, Konstantinos}, title = {EEE-Bench: A Comprehensive Multimodal Electrical And Electronics Engineering Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13337-13349} }
Text-Driven Fashion Image Editing with Compositional Concept Learning and Counterfactual Abduction: Shanshan Huang,

Haoxuan Li,

Chunyuan Zheng,

Mingyuan Ge,

Wei Gao,

Lei Wang,

Li Liu; [pdf]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Shanshan and Li, Haoxuan and Zheng, Chunyuan and Ge, Mingyuan and Gao, Wei and Wang, Lei and Liu, Li}, title = {Text-Driven Fashion Image Editing with Compositional Concept Learning and Counterfactual Abduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28726-28735} }
EnvPoser: Environment-aware Realistic Human Motion Estimation from Sparse Observations with Uncertainty Modeling: Songpengcheng Xia,

Yu Zhang,

Zhuo Su,

Xiaozheng Zheng,

Zheng Lv,

Guidong Wang,

Yongjie Zhang,

Qi Wu,

Lei Chu,

Ling Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Songpengcheng and Zhang, Yu and Su, Zhuo and Zheng, Xiaozheng and Lv, Zheng and Wang, Guidong and Zhang, Yongjie and Wu, Qi and Chu, Lei and Pei, Ling}, title = {EnvPoser: Environment-aware Realistic Human Motion Estimation from Sparse Observations with Uncertainty Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1839-1849} }
A Unified Framework for Heterogeneous Semi-supervised Learning: Marzi Heidari,

Abdullah Alchihabi,

Hao Yan,

Yuhong Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heidari_2025_CVPR, author = {Heidari, Marzi and Alchihabi, Abdullah and Yan, Hao and Guo, Yuhong}, title = {A Unified Framework for Heterogeneous Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15371-15380} }
Adapting Text-to-Image Generation with Feature Difference Instruction for Generic Image Restoration: Chao Wang,

Hehe Fan,

Huichen Yang,

Sarvnaz Karimi,

Lina Yao,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Chao and Fan, Hehe and Yang, Huichen and Karimi, Sarvnaz and Yao, Lina and Yang, Yi}, title = {Adapting Text-to-Image Generation with Feature Difference Instruction for Generic Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23539-23550} }
ReCon: Enhancing True Correspondence Discrimination through Relation Consistency for Robust Noisy Correspondence Learning: Quanxing Zha,

Xin Liu,

Shu-Juan Peng,

Yiu-ming Cheung,

Xing Xu,

Nannan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zha_2025_CVPR, author = {Zha, Quanxing and Liu, Xin and Peng, Shu-Juan and Cheung, Yiu-ming and Xu, Xing and Wang, Nannan}, title = {ReCon: Enhancing True Correspondence Discrimination through Relation Consistency for Robust Noisy Correspondence Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29680-29689} }
Free360: Layered Gaussian Splatting for Unbounded 360-Degree View Synthesis from Extremely Sparse and Unposed Views: Chong Bao,

Xiyu Zhang,

Zehao Yu,

Jiale Shi,

Guofeng Zhang,

Songyou Peng,

Zhaopeng Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2025_CVPR, author = {Bao, Chong and Zhang, Xiyu and Yu, Zehao and Shi, Jiale and Zhang, Guofeng and Peng, Songyou and Cui, Zhaopeng}, title = {Free360: Layered Gaussian Splatting for Unbounded 360-Degree View Synthesis from Extremely Sparse and Unposed Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16377-16387} }
Tuning the Frequencies: Robust Training for Sinusoidal Neural Networks: Tiago Novello,

Diana Aldana,

Andre Araujo,

Luiz Velho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Novello_2025_CVPR, author = {Novello, Tiago and Aldana, Diana and Araujo, Andre and Velho, Luiz}, title = {Tuning the Frequencies: Robust Training for Sinusoidal Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3071-3080} }
Preconditioners for the Stochastic Training of Neural Fields: Shin-Fang Chng,

Hemanth Saratchandran,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chng_2025_CVPR, author = {Chng, Shin-Fang and Saratchandran, Hemanth and Lucey, Simon}, title = {Preconditioners for the Stochastic Training of Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27222-27232} }
Open Ad-hoc Categorization with Contextualized Feature Learning: Zilin Wang,

Sangwoo Mo,

Stella X. Yu,

Sima Behpour,

Liu Ren; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zilin and Mo, Sangwoo and Yu, Stella X. and Behpour, Sima and Ren, Liu}, title = {Open Ad-hoc Categorization with Contextualized Feature Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15108-15117} }
Real-time Free-view Human Rendering from Sparse-view RGB Videos using Double Unprojected Textures: Guoxing Sun,

Rishabh Dabral,

Heming Zhu,

Pascal Fua,

Christian Theobalt,

Marc Habermann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Guoxing and Dabral, Rishabh and Zhu, Heming and Fua, Pascal and Theobalt, Christian and Habermann, Marc}, title = {Real-time Free-view Human Rendering from Sparse-view RGB Videos using Double Unprojected Textures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {562-573} }
ECBench: Can Multi-modal Foundation Models Understand the Egocentric World? A Holistic Embodied Cognition Benchmark: Ronghao Dang,

Yuqian Yuan,

Wenqi Zhang,

Yifei Xin,

Boqiang Zhang,

Long Li,

Liuyi Wang,

Qinyang Zeng,

Xin Li,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2025_CVPR, author = {Dang, Ronghao and Yuan, Yuqian and Zhang, Wenqi and Xin, Yifei and Zhang, Boqiang and Li, Long and Wang, Liuyi and Zeng, Qinyang and Li, Xin and Bing, Lidong}, title = {ECBench: Can Multi-modal Foundation Models Understand the Egocentric World? A Holistic Embodied Cognition Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24593-24602} }
SfM-Free 3D Gaussian Splatting via Hierarchical Training: Bo Ji,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Bo and Yao, Angela}, title = {SfM-Free 3D Gaussian Splatting via Hierarchical Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21654-21663} }
Large Self-Supervised Models Bridge the Gap in Domain Adaptive Object Detection: Marc-Antoine Lavoie,

Anas Mahmoud,

Steven L. Waslander; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lavoie_2025_CVPR, author = {Lavoie, Marc-Antoine and Mahmoud, Anas and Waslander, Steven L.}, title = {Large Self-Supervised Models Bridge the Gap in Domain Adaptive Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4692-4702} }
Dynamic Updates for Language Adaptation in Visual-Language Tracking: Xiaohai Li,

Bineng Zhong,

Qihua Liang,

Zhiyi Mo,

Jian Nong,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaohai and Zhong, Bineng and Liang, Qihua and Mo, Zhiyi and Nong, Jian and Song, Shuxiang}, title = {Dynamic Updates for Language Adaptation in Visual-Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19165-19174} }
Multi-focal Conditioned Latent Diffusion for Person Image Synthesis: Jiaqi Liu,

Jichao Zhang,

Paolo Rota,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiaqi and Zhang, Jichao and Rota, Paolo and Sebe, Nicu}, title = {Multi-focal Conditioned Latent Diffusion for Person Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16019-16028} }
Uncertainty Meets Diversity: A Comprehensive Active Learning Framework for Indoor 3D Object Detection: Jiangyi Wang,

Na Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jiangyi and Zhao, Na}, title = {Uncertainty Meets Diversity: A Comprehensive Active Learning Framework for Indoor 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20329-20339} }
CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design: Weitao Feng,

Hang Zhou,

Jing Liao,

Li Cheng,

Wenbo Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Weitao and Zhou, Hang and Liao, Jing and Cheng, Li and Zhou, Wenbo}, title = {CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29173-29182} }
Identity-Clothing Similarity Modeling for Unsupervised Clothing Change Person Re-Identification: Zhiqi Pang,

Junjie Wang,

Lingling Zhao,

Chunyu Wang; [pdf]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Zhiqi and Wang, Junjie and Zhao, Lingling and Wang, Chunyu}, title = {Identity-Clothing Similarity Modeling for Unsupervised Clothing Change Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19251-19260} }
Evaluating Model Perception of Color Illusions in Photorealistic Scenes: Lingjun Mao,

Zineng Tang,

Alane Suhr; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Lingjun and Tang, Zineng and Suhr, Alane}, title = {Evaluating Model Perception of Color Illusions in Photorealistic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7805-7814} }
MINIMA: Modality Invariant Image Matching: Jiangwei Ren,

Xingyu Jiang,

Zizhuo Li,

Dingkang Liang,

Xin Zhou,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Jiangwei and Jiang, Xingyu and Li, Zizhuo and Liang, Dingkang and Zhou, Xin and Bai, Xiang}, title = {MINIMA: Modality Invariant Image Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23059-23068} }
OccMamba: Semantic Occupancy Prediction with State Space Models: Heng Li,

Yuenan Hou,

Xiaohan Xing,

Yuexin Ma,

Xiao Sun,

Yanyong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Heng and Hou, Yuenan and Xing, Xiaohan and Ma, Yuexin and Sun, Xiao and Zhang, Yanyong}, title = {OccMamba: Semantic Occupancy Prediction with State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11949-11959} }
3D Convex Splatting: Radiance Field Rendering with 3D Smooth Convexes: Jan Held,

Renaud Vandeghen,

Abdullah Hamdi,

Adrien Deliege,

Anthony Cioppa,

Silvio Giancola,

Andrea Vedaldi,

Bernard Ghanem,

Marc Van Droogenbroeck; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Held_2025_CVPR, author = {Held, Jan and Vandeghen, Renaud and Hamdi, Abdullah and Deliege, Adrien and Cioppa, Anthony and Giancola, Silvio and Vedaldi, Andrea and Ghanem, Bernard and Van Droogenbroeck, Marc}, title = {3D Convex Splatting: Radiance Field Rendering with 3D Smooth Convexes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21360-21369} }
3D Prior Is All You Need: Cross-Task Few-shot 2D Gaze Estimation: Yihua Cheng,

Hengfei Wang,

Zhongqun Zhang,

Yang Yue,

Boeun Kim,

Feng Lu,

Hyung Jin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Yihua and Wang, Hengfei and Zhang, Zhongqun and Yue, Yang and Kim, Boeun and Lu, Feng and Chang, Hyung Jin}, title = {3D Prior Is All You Need: Cross-Task Few-shot 2D Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23891-23900} }
Cheb-GR: Rethinking K-nearest Neighbor Search in Re-ranking for Person Re-identification: Jinxi Yang,

He Li,

Bo Du,

Mang Ye; [pdf]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jinxi and Li, He and Du, Bo and Ye, Mang}, title = {Cheb-GR: Rethinking K-nearest Neighbor Search in Re-ranking for Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19261-19270} }
Spotting the Unexpected (STU): A 3D LiDAR Dataset for Anomaly Segmentation in Autonomous Driving: Alexey Nekrasov,

Malcolm Burdorf,

Stewart Worrall,

Bastian Leibe,

Julie Stephany Berrio Perez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nekrasov_2025_CVPR, author = {Nekrasov, Alexey and Burdorf, Malcolm and Worrall, Stewart and Leibe, Bastian and Perez, Julie Stephany Berrio}, title = {Spotting the Unexpected (STU): A 3D LiDAR Dataset for Anomaly Segmentation in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11875-11885} }
Is `Right' Right? Enhancing Object Orientation Understanding in Multimodal Large Language Models through Egocentric Instruction Tuning: Ji Hyeok Jung,

Eun Tae Kim,

Seoyeon Kim,

Joo Ho Lee,

Bumsoo Kim,

Buru Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Ji Hyeok and Kim, Eun Tae and Kim, Seoyeon and Lee, Joo Ho and Kim, Bumsoo and Chang, Buru}, title = {Is `Right' Right? Enhancing Object Orientation Understanding in Multimodal Large Language Models through Egocentric Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14257-14267} }
GCC: Generative Color Constancy via Diffusing a Color Checker: Chen-Wei Chang,

Cheng-De Fan,

Chia-Che Chang,

Yi-Chen Lo,

Yu-Chee Tseng,

Jiun-Long Huang,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Chen-Wei and Fan, Cheng-De and Chang, Chia-Che and Lo, Yi-Chen and Tseng, Yu-Chee and Huang, Jiun-Long and Liu, Yu-Lun}, title = {GCC: Generative Color Constancy via Diffusing a Color Checker}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10868-10878} }
Do Visual Imaginations Improve Vision-and-Language Navigation Agents?: Akhil Perincherry,

Jacob Krantz,

Stefan Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Perincherry_2025_CVPR, author = {Perincherry, Akhil and Krantz, Jacob and Lee, Stefan}, title = {Do Visual Imaginations Improve Vision-and-Language Navigation Agents?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3846-3855} }
On Denoising Walking Videos for Gait Recognition: Dongyang Jin,

Chao Fan,

Jingzhe Ma,

Jingkai Zhou,

Weihua Chen,

Shiqi Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Dongyang and Fan, Chao and Ma, Jingzhe and Zhou, Jingkai and Chen, Weihua and Yu, Shiqi}, title = {On Denoising Walking Videos for Gait Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12347-12357} }
Conformal Prediction for Zero-Shot Models: Julio Silva-Rodríguez,

Ismail Ben Ayed,

Jose Dolz; [pdf] [supp]
[bibtex]
@InProceedings{Silva-Rodriguez_2025_CVPR, author = {Silva-Rodr{\'\i}guez, Julio and Ben Ayed, Ismail and Dolz, Jose}, title = {Conformal Prediction for Zero-Shot Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19931-19941} }
PhysAnimator: Physics-Guided Generative Cartoon Animation: Tianyi Xie,

Yiwei Zhao,

Ying Jiang,

Chenfanfu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Tianyi and Zhao, Yiwei and Jiang, Ying and Jiang, Chenfanfu}, title = {PhysAnimator: Physics-Guided Generative Cartoon Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10793-10804} }
SeriesBench: A Benchmark for Narrative-Driven Drama Series Understanding: Chenkai Zhang,

Yiming Lei,

Zeming Liu,

Haitao Leng,

ShaoGuo Liu,

Tingting Gao,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenkai and Lei, Yiming and Liu, Zeming and Leng, Haitao and Liu, ShaoGuo and Gao, Tingting and Liu, Qingjie and Wang, Yunhong}, title = {SeriesBench: A Benchmark for Narrative-Driven Drama Series Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28995-29004} }
Weakly Supervised Temporal Action Localization via Dual-Prior Collaborative Learning Guided by Multimodal Large Language Models: Quan Zhang,

Jinwei Fang,

Rui Yuan,

Xi Tang,

Yuxin Qi,

Ke Zhang,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Quan and Fang, Jinwei and Yuan, Rui and Tang, Xi and Qi, Yuxin and Zhang, Ke and Yuan, Chun}, title = {Weakly Supervised Temporal Action Localization via Dual-Prior Collaborative Learning Guided by Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24139-24148} }
FIMA-Q: Post-Training Quantization for Vision Transformers by Fisher Information Matrix Approximation: Zhuguanyu Wu,

Shihe Wang,

Jiayi Zhang,

Jiaxin Chen,

Yunhong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Zhuguanyu and Wang, Shihe and Zhang, Jiayi and Chen, Jiaxin and Wang, Yunhong}, title = {FIMA-Q: Post-Training Quantization for Vision Transformers by Fisher Information Matrix Approximation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14891-14900} }
HotSpot: Signed Distance Function Optimization with an Asymptotically Sufficient Condition: Zimo Wang,

Cheng Wang,

Taiki Yoshino,

Sirui Tao,

Ziyang Fu,

Tzu-Mao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zimo and Wang, Cheng and Yoshino, Taiki and Tao, Sirui and Fu, Ziyang and Li, Tzu-Mao}, title = {HotSpot: Signed Distance Function Optimization with an Asymptotically Sufficient Condition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1276-1286} }
GliaNet: Adaptive Neural Network Structure Learning with Glia-Driven: Mengqiao Han,

Liyuan Pan,

Xiabi Liu; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Mengqiao and Pan, Liyuan and Liu, Xiabi}, title = {GliaNet: Adaptive Neural Network Structure Learning with Glia-Driven}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25240-25249} }
BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs: Zhantao Yang,

Ruili Feng,

Keyu Yan,

Huangji Wang,

Zhicai Wang,

Shangwen Zhu,

Han Zhang,

Jie Xiao,

Pingyu Wu,

Kai Zhu,

Jixuan Chen,

Chen-Wei Xie,

Yue Yang,

Hongyang Zhang,

Yu Liu,

Fan Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhantao and Feng, Ruili and Yan, Keyu and Wang, Huangji and Wang, Zhicai and Zhu, Shangwen and Zhang, Han and Xiao, Jie and Wu, Pingyu and Zhu, Kai and Chen, Jixuan and Xie, Chen-Wei and Yang, Yue and Zhang, Hongyang and Liu, Yu and Cheng, Fan}, title = {BACON: Improving Clarity of Image Captions via Bag-of-Concept Graphs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14380-14389} }
EntitySAM: Segment Everything in Video: Mingqiao Ye,

Seoung Wug Oh,

Lei Ke,

Joon-Young Lee; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Mingqiao and Oh, Seoung Wug and Ke, Lei and Lee, Joon-Young}, title = {EntitySAM: Segment Everything in Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24234-24243} }
GS-2DGS: Geometrically Supervised 2DGS for Reflective Object Reconstruction: Jinguang Tong,

Xuesong Li,

Fahira Afzal Maken,

Sundaram Muthu,

Lars Petersson,

Chuong Nguyen,

Hongdong Li; [pdf] [supp]
[bibtex]
@InProceedings{Tong_2025_CVPR, author = {Tong, Jinguang and Li, Xuesong and Maken, Fahira Afzal and Muthu, Sundaram and Petersson, Lars and Nguyen, Chuong and Li, Hongdong}, title = {GS-2DGS: Geometrically Supervised 2DGS for Reflective Object Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21547-21557} }
Libra-Merging: Importance-redundancy and Pruning-merging Trade-off for Acceleration Plug-in in Large Vision-Language Model: Longrong Yang,

Dong Shen,

Chaoxiang Cai,

Kaibing Chen,

Fan Yang,

Tingting Gao,

Di Zhang,

Xi Li; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Longrong and Shen, Dong and Cai, Chaoxiang and Chen, Kaibing and Yang, Fan and Gao, Tingting and Zhang, Di and Li, Xi}, title = {Libra-Merging: Importance-redundancy and Pruning-merging Trade-off for Acceleration Plug-in in Large Vision-Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9402-9412} }
VasTSD: Learning 3D Vascular Tree-state Space Diffusion Model for Angiography Synthesis: Zhifeng Wang,

Renjiao Yi,

Xin Wen,

Chenyang Zhu,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhifeng and Yi, Renjiao and Wen, Xin and Zhu, Chenyang and Xu, Kai}, title = {VasTSD: Learning 3D Vascular Tree-state Space Diffusion Model for Angiography Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15693-15702} }
PanSplat: 4K Panorama Synthesis with Feed-Forward Gaussian Splatting: Cheng Zhang,

Haofei Xu,

Qianyi Wu,

Camilo Cruz Gambardella,

Dinh Phung,

Jianfei Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Cheng and Xu, Haofei and Wu, Qianyi and Gambardella, Camilo Cruz and Phung, Dinh and Cai, Jianfei}, title = {PanSplat: 4K Panorama Synthesis with Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11437-11447} }
WISNet: Pseudo Label Generation on Unbalanced and Patch Annotated Waste Images: Shifan Zhang,

Hongzi Zhu,

Yinan He,

Minyi Guo,

Ziyang Lou,

Shan Chang; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shifan and Zhu, Hongzi and He, Yinan and Guo, Minyi and Lou, Ziyang and Chang, Shan}, title = {WISNet: Pseudo Label Generation on Unbalanced and Patch Annotated Waste Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15076-15085} }
MixerMDM: Learnable Composition of Human Motion Diffusion Models: Pablo Ruiz-Ponce,

German Barquero,

Cristina Palmero,

Sergio Escalera,

José García-Rodríguez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruiz-Ponce_2025_CVPR, author = {Ruiz-Ponce, Pablo and Barquero, German and Palmero, Cristina and Escalera, Sergio and Garc{\'\i}a-Rodr{\'\i}guez, Jos\'e}, title = {MixerMDM: Learnable Composition of Human Motion Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12380-12390} }
Hand-held Object Reconstruction from RGB Video with Dynamic Interaction: Shijian Jiang,

Qi Ye,

Rengan Xie,

Yuchi Huo,

Jiming Chen; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Shijian and Ye, Qi and Xie, Rengan and Huo, Yuchi and Chen, Jiming}, title = {Hand-held Object Reconstruction from RGB Video with Dynamic Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12220-12230} }
LEDiff: Latent Exposure Diffusion for HDR Generation: Chao Wang,

Zhihao Xia,

Thomas Leimkuhler,

Karol Myszkowski,

Xuaner Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Chao and Xia, Zhihao and Leimkuhler, Thomas and Myszkowski, Karol and Zhang, Xuaner}, title = {LEDiff: Latent Exposure Diffusion for HDR Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {453-464} }
Video Depth Anything: Consistent Depth Estimation for Super-Long Videos: Sili Chen,

Hengkai Guo,

Shengnan Zhu,

Feihu Zhang,

Zilong Huang,

Jiashi Feng,

Bingyi Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Sili and Guo, Hengkai and Zhu, Shengnan and Zhang, Feihu and Huang, Zilong and Feng, Jiashi and Kang, Bingyi}, title = {Video Depth Anything: Consistent Depth Estimation for Super-Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22831-22840} }
VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation: Ziyang Luo,

Haoning Wu,

Dongxu Li,

Jing Ma,

Mohan Kankanhalli,

Junnan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Ziyang and Wu, Haoning and Li, Dongxu and Ma, Jing and Kankanhalli, Mohan and Li, Junnan}, title = {VideoAutoArena: An Automated Arena for Evaluating Large Multimodal Models in Video Analysis through User Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8461-8474} }
InstanceCap: Improving Text-to-Video Generation via Instance-aware Structured Caption: Tiehan Fan,

Kepan Nan,

Rui Xie,

Penghao Zhou,

Zhenheng Yang,

Chaoyou Fu,

Xiang Li,

Jian Yang,

Ying Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Tiehan and Nan, Kepan and Xie, Rui and Zhou, Penghao and Yang, Zhenheng and Fu, Chaoyou and Li, Xiang and Yang, Jian and Tai, Ying}, title = {InstanceCap: Improving Text-to-Video Generation via Instance-aware Structured Caption}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28974-28983} }
AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers: Jiazhi Guan,

Kaisiyuan Wang,

Zhiliang Xu,

Quanwei Yang,

Yasheng Sun,

Shengyi He,

Borong Liang,

Yukang Cao,

Yingying Li,

Haocheng Feng,

Errui Ding,

Jingdong Wang,

Youjian Zhao,

Hang Zhou,

Ziwei Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Guan_2025_CVPR, author = {Guan, Jiazhi and Wang, Kaisiyuan and Xu, Zhiliang and Yang, Quanwei and Sun, Yasheng and He, Shengyi and Liang, Borong and Cao, Yukang and Li, Yingying and Feng, Haocheng and Ding, Errui and Wang, Jingdong and Zhao, Youjian and Zhou, Hang and Liu, Ziwei}, title = {AudCast: Audio-Driven Human Video Generation by Cascaded Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10678-10689} }
Luminance-GS: Adapting 3D Gaussian Splatting to Challenging Lighting Conditions with View-Adaptive Curve Adjustment: Ziteng Cui,

Xuangeng Chu,

Tatsuya Harada; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Ziteng and Chu, Xuangeng and Harada, Tatsuya}, title = {Luminance-GS: Adapting 3D Gaussian Splatting to Challenging Lighting Conditions with View-Adaptive Curve Adjustment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26472-26482} }
EventSplat: 3D Gaussian Splatting from Moving Event Cameras for Real-time Rendering: Toshiya Yura,

Ashkan Mirzaei,

Igor Gilitschenski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yura_2025_CVPR, author = {Yura, Toshiya and Mirzaei, Ashkan and Gilitschenski, Igor}, title = {EventSplat: 3D Gaussian Splatting from Moving Event Cameras for Real-time Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26876-26886} }
Thinking in Space: How Multimodal Large Language Models See, Remember, and Recall Spaces: Jihan Yang,

Shusheng Yang,

Anjali W. Gupta,

Rilyn Han,

Li Fei-Fei,

Saining Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jihan and Yang, Shusheng and Gupta, Anjali W. and Han, Rilyn and Fei-Fei, Li and Xie, Saining}, title = {Thinking in Space: How Multimodal Large Language Models See, Remember, and Recall Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10632-10643} }
3D Student Splatting and Scooping: Jialin Zhu,

Jiangbei Yue,

Feixiang He,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jialin and Yue, Jiangbei and He, Feixiang and Wang, He}, title = {3D Student Splatting and Scooping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21045-21054} }
World-consistent Video Diffusion with Explicit 3D Modeling: Qihang Zhang,

Shuangfei Zhai,

Miguel Ángel Bautista Martin,

Kevin Miao,

Alexander Toshev,

Joshua Susskind,

Jiatao Gu; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qihang and Zhai, Shuangfei and Martin, Miguel \'Angel Bautista and Miao, Kevin and Toshev, Alexander and Susskind, Joshua and Gu, Jiatao}, title = {World-consistent Video Diffusion with Explicit 3D Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21685-21695} }
A Stitch in Time Saves Nine: Small VLM is a Precise Guidance for Accelerating Large VLMs: Wangbo Zhao,

Yizeng Han,

Jiasheng Tang,

Zhikai Li,

Yibing Song,

Kai Wang,

Zhangyang Wang,

Yang You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Wangbo and Han, Yizeng and Tang, Jiasheng and Li, Zhikai and Song, Yibing and Wang, Kai and Wang, Zhangyang and You, Yang}, title = {A Stitch in Time Saves Nine: Small VLM is a Precise Guidance for Accelerating Large VLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19814-19824} }
Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion: Vitor Guizilini,

Muhammad Zubair Irshad,

Dian Chen,

Greg Shakhnarovich,

Rares Ambrus; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guizilini_2025_CVPR, author = {Guizilini, Vitor and Irshad, Muhammad Zubair and Chen, Dian and Shakhnarovich, Greg and Ambrus, Rares}, title = {Zero-Shot Novel View and Depth Synthesis with Multi-View Geometric Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {764-776} }
Unveiling Visual Perception in Language Models: An Attention Head Analysis Approach: Jing Bi,

Junjia Guo,

Yunlong Tang,

Lianggong Bruce Wen,

Zhang Liu,

Bingjie Wang,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bi_2025_CVPR, author = {Bi, Jing and Guo, Junjia and Tang, Yunlong and Wen, Lianggong Bruce and Liu, Zhang and Wang, Bingjie and Xu, Chenliang}, title = {Unveiling Visual Perception in Language Models: An Attention Head Analysis Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4135-4144} }
SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models: Jaerin Lee,

Daniel Sungho Jung,

Kanggeon Lee,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jaerin and Jung, Daniel Sungho and Lee, Kanggeon and Lee, Kyoung Mu}, title = {SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13021-13030} }
SemiDAViL: Semi-supervised Domain Adaptation with Vision-Language Guidance for Semantic Segmentation: Hritam Basak,

Zhaozheng Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Basak_2025_CVPR, author = {Basak, Hritam and Yin, Zhaozheng}, title = {SemiDAViL: Semi-supervised Domain Adaptation with Vision-Language Guidance for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9816-9828} }
Learning Partonomic 3D Reconstruction from Image Collections: Xiaoqian Ruan,

Pei Yu,

Dian Jia,

Hyeonjeong Park,

Peixi Xiong,

Wei Tang; [pdf] [supp]
[bibtex]
@InProceedings{Ruan_2025_CVPR, author = {Ruan, Xiaoqian and Yu, Pei and Jia, Dian and Park, Hyeonjeong and Xiong, Peixi and Tang, Wei}, title = {Learning Partonomic 3D Reconstruction from Image Collections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26734-26744} }
Arc2Avatar: Generating Expressive 3D Avatars from a Single Image via ID Guidance: Dimitrios Gerogiannis,

Foivos Paraperas Papantoniou,

Rolandos Alexandros Potamias,

Alexandros Lattas,

Stefanos Zafeiriou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gerogiannis_2025_CVPR, author = {Gerogiannis, Dimitrios and Papantoniou, Foivos Paraperas and Potamias, Rolandos Alexandros and Lattas, Alexandros and Zafeiriou, Stefanos}, title = {Arc2Avatar: Generating Expressive 3D Avatars from a Single Image via ID Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10770-10782} }
Seeing Speech and Sound: Distinguishing and Locating Audio Sources in Visual Scenes: Hyeonggon Ryu,

Seongyu Kim,

Joon Son Chung,

Arda Senocak; [pdf] [supp]
[bibtex]
@InProceedings{Ryu_2025_CVPR, author = {Ryu, Hyeonggon and Kim, Seongyu and Chung, Joon Son and Senocak, Arda}, title = {Seeing Speech and Sound: Distinguishing and Locating Audio Sources in Visual Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13540-13549} }
Structure from Collision: Takuhiro Kaneko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kaneko_2025_CVPR, author = {Kaneko, Takuhiro}, title = {Structure from Collision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16314-16324} }
ODA-GAN: Orthogonal Decoupling Alignment GAN Assisted by Weakly-supervised Learning for Virtual Immunohistochemistry Staining: Tong Wang,

Mingkang Wang,

Zhongze Wang,

Hongkai Wang,

Qi Xu,

Fengyu Cong,

Hongming Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tong and Wang, Mingkang and Wang, Zhongze and Wang, Hongkai and Xu, Qi and Cong, Fengyu and Xu, Hongming}, title = {ODA-GAN: Orthogonal Decoupling Alignment GAN Assisted by Weakly-supervised Learning for Virtual Immunohistochemistry Staining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25920-25929} }
EVOS: Efficient Implicit Neural Training via EVOlutionary Selector: Weixiang Zhang,

Shuzhao Xie,

Chengwei Ren,

Siyi Xie,

Chen Tang,

Shijia Ge,

Mingzi Wang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Weixiang and Xie, Shuzhao and Ren, Chengwei and Xie, Siyi and Tang, Chen and Ge, Shijia and Wang, Mingzi and Wang, Zhi}, title = {EVOS: Efficient Implicit Neural Training via EVOlutionary Selector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30472-30482} }
Crab: A Unified Audio-Visual Scene Understanding Model with Explicit Cooperation: Henghui Du,

Guangyao Li,

Chang Zhou,

Chunjie Zhang,

Alan Zhao,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Henghui and Li, Guangyao and Zhou, Chang and Zhang, Chunjie and Zhao, Alan and Hu, Di}, title = {Crab: A Unified Audio-Visual Scene Understanding Model with Explicit Cooperation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18804-18814} }
Nullu: Mitigating Object Hallucinations in Large Vision-Language Models via HalluSpace Projection: Le Yang,

Ziwei Zheng,

Boxu Chen,

Zhengyu Zhao,

Chenhao Lin,

Chao Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Le and Zheng, Ziwei and Chen, Boxu and Zhao, Zhengyu and Lin, Chenhao and Shen, Chao}, title = {Nullu: Mitigating Object Hallucinations in Large Vision-Language Models via HalluSpace Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14635-14645} }
OralXrays-9: Towards Hospital-Scale Panoramic X-ray Anomaly Detection via Personalized Multi-Object Query-Aware Mining: Bingzhi Chen,

Sisi Fu,

Xiaocheng Fang,

Jieyi Cai,

Boya Zhang,

Minhua Lu,

Yishu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Bingzhi and Fu, Sisi and Fang, Xiaocheng and Cai, Jieyi and Zhang, Boya and Lu, Minhua and Liu, Yishu}, title = {OralXrays-9: Towards Hospital-Scale Panoramic X-ray Anomaly Detection via Personalized Multi-Object Query-Aware Mining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15570-15579} }
MEET: Towards Memory-Efficient Temporal Sparse Deep Neural Networks: Zeqi Zhu,

Ibrahim Batuhan Akkaya,

Luc Waeijen,

Egor Bondarev,

Arash Pourtaherian,

Orlando Moreira; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zeqi and Akkaya, Ibrahim Batuhan and Waeijen, Luc and Bondarev, Egor and Pourtaherian, Arash and Moreira, Orlando}, title = {MEET: Towards Memory-Efficient Temporal Sparse Deep Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29309-29320} }
SplatAD: Real-Time Lidar and Camera Rendering with 3D Gaussian Splatting for Autonomous Driving: Georg Hess,

Carl Lindström,

Maryam Fatemi,

Christoffer Petersson,

Lennart Svensson; [pdf] [supp]
[bibtex]
@InProceedings{Hess_2025_CVPR, author = {Hess, Georg and Lindstr\"om, Carl and Fatemi, Maryam and Petersson, Christoffer and Svensson, Lennart}, title = {SplatAD: Real-Time Lidar and Camera Rendering with 3D Gaussian Splatting for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11982-11992} }
Audio-Visual Instance Segmentation: Ruohao Guo,

Xianghua Ying,

Yaru Chen,

Dantong Niu,

Guangyao Li,

Liao Qu,

Yanyu Qi,

Jinxing Zhou,

Bowei Xing,

Wenzhen Yue,

Ji Shi,

Qixun Wang,

Peiliang Zhang,

Buwen Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Ruohao and Ying, Xianghua and Chen, Yaru and Niu, Dantong and Li, Guangyao and Qu, Liao and Qi, Yanyu and Zhou, Jinxing and Xing, Bowei and Yue, Wenzhen and Shi, Ji and Wang, Qixun and Zhang, Peiliang and Liang, Buwen}, title = {Audio-Visual Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13550-13560} }
Probabilistic Prompt Distribution Learning for Animal Pose Estimation: Jiyong Rao,

Brian Nlong Zhao,

Yu Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Rao_2025_CVPR, author = {Rao, Jiyong and Zhao, Brian Nlong and Wang, Yu}, title = {Probabilistic Prompt Distribution Learning for Animal Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29438-29447} }
dFLMoE: Decentralized Federated Learning via Mixture of Experts for Medical Data Analysis: Luyuan Xie,

Tianyu Luan,

Wenyuan Cai,

Guochen Yan,

Zhaoyu Chen,

Nan Xi,

Yuejian Fang,

Qingni Shen,

Zhonghai Wu,

Junsong Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Luyuan and Luan, Tianyu and Cai, Wenyuan and Yan, Guochen and Chen, Zhaoyu and Xi, Nan and Fang, Yuejian and Shen, Qingni and Wu, Zhonghai and Yuan, Junsong}, title = {dFLMoE: Decentralized Federated Learning via Mixture of Experts for Medical Data Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10203-10213} }
Reconstructing Humans with a Biomechanically Accurate Skeleton: Yan Xia,

Xiaowei Zhou,

Etienne Vouga,

Qixing Huang,

Georgios Pavlakos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Yan and Zhou, Xiaowei and Vouga, Etienne and Huang, Qixing and Pavlakos, Georgios}, title = {Reconstructing Humans with a Biomechanically Accurate Skeleton}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5355-5365} }
AdaCM^2: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction: Yuanbin Man,

Ying Huang,

Chengming Zhang,

Bingzhe Li,

Wei Niu,

Miao Yin; [pdf]
[bibtex]
@InProceedings{Man_2025_CVPR, author = {Man, Yuanbin and Huang, Ying and Zhang, Chengming and Li, Bingzhe and Niu, Wei and Yin, Miao}, title = {AdaCM{\textasciicircum}2: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8534-8544} }
Mitigating Object Hallucinations in Large Vision-Language Models with Assembly of Global and Local Attention: Wenbin An,

Feng Tian,

Sicong Leng,

Jiahao Nie,

Haonan Lin,

Qianying Wang,

Ping Chen,

Xiaoqin Zhang,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2025_CVPR, author = {An, Wenbin and Tian, Feng and Leng, Sicong and Nie, Jiahao and Lin, Haonan and Wang, Qianying and Chen, Ping and Zhang, Xiaoqin and Lu, Shijian}, title = {Mitigating Object Hallucinations in Large Vision-Language Models with Assembly of Global and Local Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29915-29926} }
VGGT: Visual Geometry Grounded Transformer: Jianyuan Wang,

Minghao Chen,

Nikita Karaev,

Andrea Vedaldi,

Christian Rupprecht,

David Novotny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jianyuan and Chen, Minghao and Karaev, Nikita and Vedaldi, Andrea and Rupprecht, Christian and Novotny, David}, title = {VGGT: Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5294-5306} }
Silent Branding Attack: Trigger-free Data Poisoning Attack on Text-to-Image Diffusion Models: Sangwon Jang,

June Suk Choi,

Jaehyeong Jo,

Kimin Lee,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Sangwon and Choi, June Suk and Jo, Jaehyeong and Lee, Kimin and Hwang, Sung Ju}, title = {Silent Branding Attack: Trigger-free Data Poisoning Attack on Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8203-8212} }
UniSTD: Towards Unified Spatio-Temporal Learning across Diverse Disciplines: Chen Tang,

Xinzhu Ma,

Encheng Su,

Xiufeng Song,

Xiaohong Liu,

Wei-Hong Li,

Lei Bai,

Wanli Ouyang,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Chen and Ma, Xinzhu and Su, Encheng and Song, Xiufeng and Liu, Xiaohong and Li, Wei-Hong and Bai, Lei and Ouyang, Wanli and Yue, Xiangyu}, title = {UniSTD: Towards Unified Spatio-Temporal Learning across Diverse Disciplines}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29213-29224} }
Visual Consensus Prompting for Co-Salient Object Detection: Jie Wang,

Nana Yu,

Zihao Zhang,

Yahong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jie and Yu, Nana and Zhang, Zihao and Han, Yahong}, title = {Visual Consensus Prompting for Co-Salient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9591-9600} }
Mani-GS: Gaussian Splatting Manipulation with Triangular Mesh: Xiangjun Gao,

Xiaoyu Li,

Yiyu Zhuang,

Qi Zhang,

Wenbo Hu,

Chaopeng Zhang,

Yao Yao,

Ying Shan,

Long Quan; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Xiangjun and Li, Xiaoyu and Zhuang, Yiyu and Zhang, Qi and Hu, Wenbo and Zhang, Chaopeng and Yao, Yao and Shan, Ying and Quan, Long}, title = {Mani-GS: Gaussian Splatting Manipulation with Triangular Mesh}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21392-21402} }
UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation: Yinqiao Wang,

Hao Xu,

Pheng-Ann Heng,

Chi-Wing Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yinqiao and Xu, Hao and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {UniHOPE: A Unified Approach for Hand-Only and Hand-Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12231-12241} }
RL-RC-DoT: A Block-level RL agent for Task-Aware Video Compression: Uri Gadot,

Assaf Shocher,

Shie Mannor,

Gal Chechik,

Assaf Hallak; [pdf] [supp]
[bibtex]
@InProceedings{Gadot_2025_CVPR, author = {Gadot, Uri and Shocher, Assaf and Mannor, Shie and Chechik, Gal and Hallak, Assaf}, title = {RL-RC-DoT: A Block-level RL agent for Task-Aware Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12533-12542} }
Quantization without Tears: Minghao Fu,

Hao Yu,

Jie Shao,

Junjie Zhou,

Ke Zhu,

Jianxin Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Minghao and Yu, Hao and Shao, Jie and Zhou, Junjie and Zhu, Ke and Wu, Jianxin}, title = {Quantization without Tears}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4462-4472} }
PHGC: Procedural Heterogeneous Graph Completion for Natural Language Task Verification in Egocentric Videos: Xun Jiang,

Zhiyi Huang,

Xing Xu,

Jingkuan Song,

Fumin Shen,

Heng Tao Shen; [pdf]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Xun and Huang, Zhiyi and Xu, Xing and Song, Jingkuan and Shen, Fumin and Shen, Heng Tao}, title = {PHGC: Procedural Heterogeneous Graph Completion for Natural Language Task Verification in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8615-8624} }
Recognition-Synergistic Scene Text Editing: Zhengyao Fang,

Pengyuan Lyu,

Jingjing Wu,

Chengquan Zhang,

Jun Yu,

Guangming Lu,

Wenjie Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Zhengyao and Lyu, Pengyuan and Wu, Jingjing and Zhang, Chengquan and Yu, Jun and Lu, Guangming and Pei, Wenjie}, title = {Recognition-Synergistic Scene Text Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13104-13113} }
Towards Consistent Multi-Task Learning: Unlocking the Potential of Task-Specific Parameters: Xiaohan Qin,

Xiaoxing Wang,

Junchi Yan; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Xiaohan and Wang, Xiaoxing and Yan, Junchi}, title = {Towards Consistent Multi-Task Learning: Unlocking the Potential of Task-Specific Parameters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10067-10076} }
WildAvatar: Learning In-the-wild 3D Avatars from the Web: Zihao Huang,

Shoukang Hu,

Guangcong Wang,

Tianqi Liu,

Yuhang Zang,

Zhiguo Cao,

Wei Li,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zihao and Hu, Shoukang and Wang, Guangcong and Liu, Tianqi and Zang, Yuhang and Cao, Zhiguo and Li, Wei and Liu, Ziwei}, title = {WildAvatar: Learning In-the-wild 3D Avatars from the Web}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15963-15975} }
BooW-VTON: Boosting In-the-Wild Virtual Try-On via Mask-Free Pseudo Data Training: Xuanpu Zhang,

Dan Song,

Pengxin Zhan,

Tianyu Chang,

Jianhao Zeng,

Qingguo Chen,

Weihua Luo,

An-An Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xuanpu and Song, Dan and Zhan, Pengxin and Chang, Tianyu and Zeng, Jianhao and Chen, Qingguo and Luo, Weihua and Liu, An-An}, title = {BooW-VTON: Boosting In-the-Wild Virtual Try-On via Mask-Free Pseudo Data Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26399-26408} }
Rectified Diffusion Guidance for Conditional Generation: Mengfei Xia,

Nan Xue,

Yujun Shen,

Ran Yi,

Tieliang Gong,

Yong-Jin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Mengfei and Xue, Nan and Shen, Yujun and Yi, Ran and Gong, Tieliang and Liu, Yong-Jin}, title = {Rectified Diffusion Guidance for Conditional Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13371-13380} }
SceneFactor: Factored Latent 3D Diffusion for Controllable 3D Scene Generation: Aleksey Bokhovkin,

Quan Meng,

Shubham Tulsiani,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bokhovkin_2025_CVPR, author = {Bokhovkin, Aleksey and Meng, Quan and Tulsiani, Shubham and Dai, Angela}, title = {SceneFactor: Factored Latent 3D Diffusion for Controllable 3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {628-639} }
HiFi-Portrait: Zero-shot Identity-preserved Portrait Generation with High-fidelity Multi-face Fusion: Yifang Xu,

Benxiang Zhai,

Yunzhuo Sun,

Ming Li,

Yang Li,

Sidan Du; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yifang and Zhai, Benxiang and Sun, Yunzhuo and Li, Ming and Li, Yang and Du, Sidan}, title = {HiFi-Portrait: Zero-shot Identity-preserved Portrait Generation with High-fidelity Multi-face Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5625-5635} }
IAAO: Interactive Affordance Learning for Articulated Objects in 3D Environments: Can Zhang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Can and Lee, Gim Hee}, title = {IAAO: Interactive Affordance Learning for Articulated Objects in 3D Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12132-12142} }
FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis: Wonjoon Jin,

Qi Dai,

Chong Luo,

Seung-Hwan Baek,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Wonjoon and Dai, Qi and Luo, Chong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {FloVD: Optical Flow Meets Video Diffusion Model for Enhanced Camera-Controlled Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2040-2049} }
RAD: Region-Aware Diffusion Models for Image Inpainting: Sora Kim,

Sungho Suh,

Minsik Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sora and Suh, Sungho and Lee, Minsik}, title = {RAD: Region-Aware Diffusion Models for Image Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2439-2448} }
RaSS: Improving Denoising Diffusion Samplers with Reinforced Active Sampling Scheduler: Xin Ding,

Lei Yu,

Xin Li,

Zhijun Tu,

Hanting Chen,

Jie Hu,

Zhibo Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Xin and Yu, Lei and Li, Xin and Tu, Zhijun and Chen, Hanting and Hu, Jie and Chen, Zhibo}, title = {RaSS: Improving Denoising Diffusion Samplers with Reinforced Active Sampling Scheduler}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12923-12933} }
Supervising Sound Localization by In-the-wild Egomotion: Anna Min,

Ziyang Chen,

Hang Zhao,

Andrew Owens; [pdf] [supp]
[bibtex]
@InProceedings{Min_2025_CVPR, author = {Min, Anna and Chen, Ziyang and Zhao, Hang and Owens, Andrew}, title = {Supervising Sound Localization by In-the-wild Egomotion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23936-23946} }
AutoLUT: LUT-Based Image Super-Resolution with Automatic Sampling and Adaptive Residual Learning: Yuheng Xu,

Shijie Yang,

Xin Liu,

Jie Liu,

Jie Tang,

Gangshan Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yuheng and Yang, Shijie and Liu, Xin and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {AutoLUT: LUT-Based Image Super-Resolution with Automatic Sampling and Adaptive Residual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23131-23140} }
Understanding Fine-tuning CLIP for Open-vocabulary Semantic Segmentation in Hyperbolic Space: Zelin Peng,

Zhengqin Xu,

Zhilin Zeng,

Changsong Wen,

Yu Huang,

Menglin Yang,

Feilong Tang,

Wei Shen; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Wen, Changsong and Huang, Yu and Yang, Menglin and Tang, Feilong and Shen, Wei}, title = {Understanding Fine-tuning CLIP for Open-vocabulary Semantic Segmentation in Hyperbolic Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4562-4572} }
TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting: Bojun Xiong,

Jialun Liu,

Jiakui Hu,

Chenming Wu,

Jinbo Wu,

Xing Liu,

Chen Zhao,

Errui Ding,

Zhouhui Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2025_CVPR, author = {Xiong, Bojun and Liu, Jialun and Hu, Jiakui and Wu, Chenming and Wu, Jinbo and Liu, Xing and Zhao, Chen and Ding, Errui and Lian, Zhouhui}, title = {TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {551-561} }
OSV: One Step is Enough for High-Quality Image to Video Generation: Xiaofeng Mao,

Zhengkai Jiang,

Fu-yun Wang,

Jiangning Zhang,

Hao Chen,

Mingmin Chi,

Yabiao Wang,

Wenhan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Xiaofeng and Jiang, Zhengkai and Wang, Fu-yun and Zhang, Jiangning and Chen, Hao and Chi, Mingmin and Wang, Yabiao and Luo, Wenhan}, title = {OSV: One Step is Enough for High-Quality Image to Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12585-12594} }
Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval: Siyuan Duan,

Yuan Sun,

Dezhong Peng,

Zheng Liu,

Xiaomin Song,

Peng Hu; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Siyuan and Sun, Yuan and Peng, Dezhong and Liu, Zheng and Song, Xiaomin and Hu, Peng}, title = {Fuzzy Multimodal Learning for Trusted Cross-modal Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20747-20756} }
AniGS: Animatable Gaussian Avatar from a Single Image with Inconsistent Gaussian Reconstruction: Lingteng Qiu,

Shenhao Zhu,

Qi Zuo,

Xiaodong Gu,

Yuan Dong,

Junfei Zhang,

Chao Xu,

Zhe Li,

Weihao Yuan,

Liefeng Bo,

Guanying Chen,

Zilong Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Lingteng and Zhu, Shenhao and Zuo, Qi and Gu, Xiaodong and Dong, Yuan and Zhang, Junfei and Xu, Chao and Li, Zhe and Yuan, Weihao and Bo, Liefeng and Chen, Guanying and Dong, Zilong}, title = {AniGS: Animatable Gaussian Avatar from a Single Image with Inconsistent Gaussian Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21148-21158} }
GUI-Xplore: Empowering Generalizable GUI Agents with One Exploration: Yuchen Sun,

Shanhui Zhao,

Tao Yu,

Hao Wen,

Samith Va,

Mengwei Xu,

Yuanchun Li,

Chongyang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yuchen and Zhao, Shanhui and Yu, Tao and Wen, Hao and Va, Samith and Xu, Mengwei and Li, Yuanchun and Zhang, Chongyang}, title = {GUI-Xplore: Empowering Generalizable GUI Agents with One Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19477-19486} }
Few-Shot Recognition via Stage-Wise Retrieval-Augmented Finetuning: Tian Liu,

Huixin Zhang,

Shubham Parashar,

Shu Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Tian and Zhang, Huixin and Parashar, Shubham and Kong, Shu}, title = {Few-Shot Recognition via Stage-Wise Retrieval-Augmented Finetuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15086-15097} }
Concept Replacer: Replacing Sensitive Concepts in Diffusion Models via Precision Localization: Lingyun Zhang,

Yu Xie,

Yanwei Fu,

Ping Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lingyun and Xie, Yu and Fu, Yanwei and Chen, Ping}, title = {Concept Replacer: Replacing Sensitive Concepts in Diffusion Models via Precision Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8172-8181} }
A Regularization-Guided Equivariant Approach for Image Restoration: Yulu Bai,

Jiahong Fu,

Qi Xie,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Yulu and Fu, Jiahong and Xie, Qi and Meng, Deyu}, title = {A Regularization-Guided Equivariant Approach for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2300-2310} }
RestorGS: Depth-aware Gaussian Splatting for Efficient 3D Scene Restoration: Yuanjian Qiao,

Mingwen Shao,

Lingzhuang Meng,

Kai Xu; [pdf] [supp]
[bibtex]
@InProceedings{Qiao_2025_CVPR, author = {Qiao, Yuanjian and Shao, Mingwen and Meng, Lingzhuang and Xu, Kai}, title = {RestorGS: Depth-aware Gaussian Splatting for Efficient 3D Scene Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11177-11186} }
IM-Portrait: Learning 3D-aware Video Diffusion for Photorealistic Talking Heads from Monocular VideosC: Yuan Li,

Ziqian Bai,

Feitong Tan,

Zhaopeng Cui,

Sean Fanello,

Yinda Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuan and Bai, Ziqian and Tan, Feitong and Cui, Zhaopeng and Fanello, Sean and Zhang, Yinda}, title = {IM-Portrait: Learning 3D-aware Video Diffusion for Photorealistic Talking Heads from Monocular VideosC}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21107-21116} }
Deep Fair Multi-View Clustering with Attention KAN: HaiMing Xu,

Qianqian Wang,

Boyue Wang,

Quanxue Gao; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, HaiMing and Wang, Qianqian and Wang, Boyue and Gao, Quanxue}, title = {Deep Fair Multi-View Clustering with Attention KAN}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5061-5070} }
LineArt: A Knowledge-guided Training-free High-quality Appearance Transfer for Design Drawing with Diffusion Model: Xi Wang,

Hongzhen Li,

Heng Fang,

Yichen Peng,

Haoran Xie,

Xi Yang,

Chuntao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xi and Li, Hongzhen and Fang, Heng and Peng, Yichen and Xie, Haoran and Yang, Xi and Li, Chuntao}, title = {LineArt: A Knowledge-guided Training-free High-quality Appearance Transfer for Design Drawing with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2912-2923} }
4Real-Video: Learning Generalizable Photo-Realistic 4D Video Diffusion: Chaoyang Wang,

Peiye Zhuang,

Tuan Duc Ngo,

Willi Menapace,

Aliaksandr Siarohin,

Michael Vasilkovsky,

Ivan Skorokhodov,

Sergey Tulyakov,

Peter Wonka,

Hsin-Ying Lee; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Chaoyang and Zhuang, Peiye and Ngo, Tuan Duc and Menapace, Willi and Siarohin, Aliaksandr and Vasilkovsky, Michael and Skorokhodov, Ivan and Tulyakov, Sergey and Wonka, Peter and Lee, Hsin-Ying}, title = {4Real-Video: Learning Generalizable Photo-Realistic 4D Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17723-17732} }
DynaMoDe-NeRF: Motion-aware Deblurring Neural Radiance Field for Dynamic Scenes: Ashish Kumar,

Rajagopalan A. N.; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2025_CVPR, author = {Kumar, Ashish and N., Rajagopalan A.}, title = {DynaMoDe-NeRF: Motion-aware Deblurring Neural Radiance Field for Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21728-21738} }
VideoICL: Confidence-based Iterative In-context Learning for Out-of-Distribution Video Understanding: Kangsan Kim,

Geon Park,

Youngwan Lee,

Woongyeong Yeo,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Kangsan and Park, Geon and Lee, Youngwan and Yeo, Woongyeong and Hwang, Sung Ju}, title = {VideoICL: Confidence-based Iterative In-context Learning for Out-of-Distribution Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3295-3305} }
Zero-Shot Image Restoration Using Few-Step Guidance of Consistency Models (and Beyond): Tomer Garber,

Tom Tirer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garber_2025_CVPR, author = {Garber, Tomer and Tirer, Tom}, title = {Zero-Shot Image Restoration Using Few-Step Guidance of Consistency Models (and Beyond)}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2398-2407} }
Similarity-Guided Layer-Adaptive Vision Transformer for UAV Tracking: Chaocan Xue,

Bineng Zhong,

Qihua Liang,

Yaozong Zheng,

Ning Li,

Yuanliang Xue,

Shuxiang Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Chaocan and Zhong, Bineng and Liang, Qihua and Zheng, Yaozong and Li, Ning and Xue, Yuanliang and Song, Shuxiang}, title = {Similarity-Guided Layer-Adaptive Vision Transformer for UAV Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6730-6740} }
LidarGait++: Learning Local Features and Size Awareness from LiDAR Point Clouds for 3D Gait Recognition: Chuanfu Shen,

Rui Wang,

Lixin Duan,

Shiqi Yu; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Chuanfu and Wang, Rui and Duan, Lixin and Yu, Shiqi}, title = {LidarGait++: Learning Local Features and Size Awareness from LiDAR Point Clouds for 3D Gait Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6627-6636} }
UrbanCAD: Towards Highly Controllable and Photorealistic 3D Vehicles for Urban Scene Simulation: Yichong Lu,

Yichi Cai,

Shangzhan Zhang,

Hongyu Zhou,

Haoji Hu,

Huimin Yu,

Andreas Geiger,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yichong and Cai, Yichi and Zhang, Shangzhan and Zhou, Hongyu and Hu, Haoji and Yu, Huimin and Geiger, Andreas and Liao, Yiyi}, title = {UrbanCAD: Towards Highly Controllable and Photorealistic 3D Vehicles for Urban Scene Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27519-27530} }
Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model: Benlin Liu,

Yuhao Dong,

Yiqin Wang,

Zixian Ma,

Yansong Tang,

Luming Tang,

Yongming Rao,

Wei-Chiu Ma,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Benlin and Dong, Yuhao and Wang, Yiqin and Ma, Zixian and Tang, Yansong and Tang, Luming and Rao, Yongming and Ma, Wei-Chiu and Krishna, Ranjay}, title = {Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3783-3792} }
Diff-Palm: Realistic Palmprint Generation with Polynomial Creases and Intra-Class Variation Controllable Diffusion Models: Jianlong Jin,

Chenglong Zhao,

Ruixin Zhang,

Sheng Shang,

Jianqing Xu,

Jingyun Zhang,

ShaoMing Wang,

Yang Zhao,

Shouhong Ding,

Wei Jia,

Yunsheng Wu; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Jianlong and Zhao, Chenglong and Zhang, Ruixin and Shang, Sheng and Xu, Jianqing and Zhang, Jingyun and Wang, ShaoMing and Zhao, Yang and Ding, Shouhong and Jia, Wei and Wu, Yunsheng}, title = {Diff-Palm: Realistic Palmprint Generation with Polynomial Creases and Intra-Class Variation Controllable Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26367-26376} }
FoundationStereo: Zero-Shot Stereo Matching: Bowen Wen,

Matthew Trepte,

Joseph Aribido,

Jan Kautz,

Orazio Gallo,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Bowen and Trepte, Matthew and Aribido, Joseph and Kautz, Jan and Gallo, Orazio and Birchfield, Stan}, title = {FoundationStereo: Zero-Shot Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5249-5260} }
Z-Magic: Zero-shot Multiple Attributes Guided Image Creator: Yingying Deng,

Xiangyu He,

Fan Tang,

Weiming Dong; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Yingying and He, Xiangyu and Tang, Fan and Dong, Weiming}, title = {Z-Magic: Zero-shot Multiple Attributes Guided Image Creator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18390-18400} }
UniNet: A Contrastive Learning-guided Unified Framework with Feature Selection for Anomaly Detection: Shun Wei,

Jielin Jiang,

Xiaolong Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Shun and Jiang, Jielin and Xu, Xiaolong}, title = {UniNet: A Contrastive Learning-guided Unified Framework with Feature Selection for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9994-10003} }
Chain of Semantics Programming in 3D Gaussian Splatting Representation for 3D Vision Grounding: Jiaxin Shi,

Mingyue Xiang,

Hao Sun,

Yixuan Huang,

Zhi Weng; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Jiaxin and Xiang, Mingyue and Sun, Hao and Huang, Yixuan and Weng, Zhi}, title = {Chain of Semantics Programming in 3D Gaussian Splatting Representation for 3D Vision Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24560-24569} }
On the Zero-shot Adversarial Robustness of Vision-Language Models: A Truly Zero-shot and Training-free Approach: Baoshun Tong,

Hanjiang Lai,

Yan Pan,

Jian Yin; [pdf]
[bibtex]
@InProceedings{Tong_2025_CVPR, author = {Tong, Baoshun and Lai, Hanjiang and Pan, Yan and Yin, Jian}, title = {On the Zero-shot Adversarial Robustness of Vision-Language Models: A Truly Zero-shot and Training-free Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19921-19930} }
Towards General Visual-Linguistic Face Forgery Detection: Ke Sun,

Shen Chen,

Taiping Yao,

Ziyin Zhou,

Jiayi Ji,

Xiaoshuai Sun,

Chia-Wen Lin,

Rongrong Ji; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Ke and Chen, Shen and Yao, Taiping and Zhou, Ziyin and Ji, Jiayi and Sun, Xiaoshuai and Lin, Chia-Wen and Ji, Rongrong}, title = {Towards General Visual-Linguistic Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19576-19586} }
Movie Weaver: Tuning-Free Multi-Concept Video Personalization with Anchored Prompts: Feng Liang,

Haoyu Ma,

Zecheng He,

Tingbo Hou,

Ji Hou,

Kunpeng Li,

Xiaoliang Dai,

Felix Juefei-Xu,

Samaneh Azadi,

Animesh Sinha,

Peizhao Zhang,

Peter Vajda,

Diana Marculescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Feng and Ma, Haoyu and He, Zecheng and Hou, Tingbo and Hou, Ji and Li, Kunpeng and Dai, Xiaoliang and Juefei-Xu, Felix and Azadi, Samaneh and Sinha, Animesh and Zhang, Peizhao and Vajda, Peter and Marculescu, Diana}, title = {Movie Weaver: Tuning-Free Multi-Concept Video Personalization with Anchored Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13146-13156} }
LongVALE: Vision-Audio-Language-Event Benchmark Towards Time-Aware Omni-Modal Perception of Long Videos: Tiantian Geng,

Jinrui Zhang,

Qingni Wang,

Teng Wang,

Jinming Duan,

Feng Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2025_CVPR, author = {Geng, Tiantian and Zhang, Jinrui and Wang, Qingni and Wang, Teng and Duan, Jinming and Zheng, Feng}, title = {LongVALE: Vision-Audio-Language-Event Benchmark Towards Time-Aware Omni-Modal Perception of Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18959-18969} }
MVPortrait: Text-Guided Motion and Emotion Control for Multi-view Vivid Portrait Animation: Yukang Lin,

Hokit Fung,

Jianjin Xu,

Zeping Ren,

Adela S.M. Lau,

Guosheng Yin,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Yukang and Fung, Hokit and Xu, Jianjin and Ren, Zeping and Lau, Adela S.M. and Yin, Guosheng and Li, Xiu}, title = {MVPortrait: Text-Guided Motion and Emotion Control for Multi-view Vivid Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26242-26252} }
MoEdit: On Learning Quantity Perception for Multi-object Image Editing: Yanfeng Li,

Kahou Chan,

Yue Sun,

Chantong Lam,

Tong Tong,

Zitong Yu,

Keren Fu,

Xiaohong Liu,

Tao Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yanfeng and Chan, Kahou and Sun, Yue and Lam, Chantong and Tong, Tong and Yu, Zitong and Fu, Keren and Liu, Xiaohong and Tan, Tao}, title = {MoEdit: On Learning Quantity Perception for Multi-object Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2683-2693} }
Seeing More with Less: Human-like Representations in Vision Models: Andrey Gizdov,

Shimon Ullman,

Daniel Harari; [pdf] [supp]
[bibtex]
@InProceedings{Gizdov_2025_CVPR, author = {Gizdov, Andrey and Ullman, Shimon and Harari, Daniel}, title = {Seeing More with Less: Human-like Representations in Vision Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4408-4417} }
Modeling Thousands of Human Annotators for Generalizable Text-to-Image Person Re-identification: Jiayu Jiang,

Changxing Ding,

Wentao Tan,

Junhong Wang,

Jin Tao,

Xiangmin Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jiayu and Ding, Changxing and Tan, Wentao and Wang, Junhong and Tao, Jin and Xu, Xiangmin}, title = {Modeling Thousands of Human Annotators for Generalizable Text-to-Image Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9220-9230} }
Accelerating Multimodal Large Language Models by Searching Optimal Vision Token Reduction: Shiyu Zhao,

Zhenting Wang,

Felix Juefei-Xu,

Xide Xia,

Miao Liu,

Xiaofang Wang,

Mingfu Liang,

Ning Zhang,

Dimitris N. Metaxas,

Licheng Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shiyu and Wang, Zhenting and Juefei-Xu, Felix and Xia, Xide and Liu, Miao and Wang, Xiaofang and Liang, Mingfu and Zhang, Ning and Metaxas, Dimitris N. and Yu, Licheng}, title = {Accelerating Multimodal Large Language Models by Searching Optimal Vision Token Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29869-29879} }
Matrix-Free Shared Intrinsics Bundle Adjustment: Daniel Safari; [pdf]
[bibtex]
@InProceedings{Safari_2025_CVPR, author = {Safari, Daniel}, title = {Matrix-Free Shared Intrinsics Bundle Adjustment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27017-27026} }
AeroGen: Enhancing Remote Sensing Object Detection with Diffusion-Driven Data Generation: Datao Tang,

Xiangyong Cao,

Xuan Wu,

Jialin Li,

Jing Yao,

Xueru Bai,

Dongsheng Jiang,

Yin Li,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Datao and Cao, Xiangyong and Wu, Xuan and Li, Jialin and Yao, Jing and Bai, Xueru and Jiang, Dongsheng and Li, Yin and Meng, Deyu}, title = {AeroGen: Enhancing Remote Sensing Object Detection with Diffusion-Driven Data Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3614-3624} }
Tra-MoE: Learning Trajectory Prediction Model from Multiple Domains for Adaptive Policy Conditioning: Jiange Yang,

Haoyi Zhu,

Yating Wang,

Gangshan Wu,

Tong He,

Limin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jiange and Zhu, Haoyi and Wang, Yating and Wu, Gangshan and He, Tong and Wang, Limin}, title = {Tra-MoE: Learning Trajectory Prediction Model from Multiple Domains for Adaptive Policy Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6960-6970} }
Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key: Zhihe Yang,

Xufang Luo,

Dongqi Han,

Yunjian Xu,

Dongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhihe and Luo, Xufang and Han, Dongqi and Xu, Yunjian and Li, Dongsheng}, title = {Mitigating Hallucinations in Large Vision-Language Models via DPO: On-Policy Data Hold the Key}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10610-10620} }
Style Quantization for Data-Efficient GAN Training: Jian Wang,

Xin Lan,

Jizhe Zhou,

Yuxin Tian,

Jiancheng Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Lan, Xin and Zhou, Jizhe and Tian, Yuxin and Lv, Jiancheng}, title = {Style Quantization for Data-Efficient GAN Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7696-7706} }
Localizing Events in Videos with Multimodal Queries: Gengyuan Zhang,

Mang Ling Ada Fok,

Jialu Ma,

Yan Xia,

Daniel Cremers,

Philip Torr,

Volker Tresp,

Jindong Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Gengyuan and Fok, Mang Ling Ada and Ma, Jialu and Xia, Yan and Cremers, Daniel and Torr, Philip and Tresp, Volker and Gu, Jindong}, title = {Localizing Events in Videos with Multimodal Queries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3339-3351} }
PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability: Weijie Zhou,

Manli Tao,

Chaoyang Zhao,

Haiyun Guo,

Honghui Dong,

Ming Tang,

Jinqiao Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Weijie and Tao, Manli and Zhao, Chaoyang and Guo, Haiyun and Dong, Honghui and Tang, Ming and Wang, Jinqiao}, title = {PhysVLM: Enabling Visual Language Models to Understand Robotic Physical Reachability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6940-6949} }
CleanDIFT: Diffusion Features without Noise: Nick Stracke,

Stefan Andreas Baumann,

Kolja Bauer,

Frank Fundel,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stracke_2025_CVPR, author = {Stracke, Nick and Baumann, Stefan Andreas and Bauer, Kolja and Fundel, Frank and Ommer, Bj\"orn}, title = {CleanDIFT: Diffusion Features without Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {117-127} }
Simpler Diffusion: 1.5 FID on ImageNet512 with Pixel-space Diffusion: Emiel Hoogeboom,

Thomas Mensink,

Jonathan Heek,

Kay Lamerigts,

Ruiqi Gao,

Tim Salimans; [pdf] [supp]
[bibtex]
@InProceedings{Hoogeboom_2025_CVPR, author = {Hoogeboom, Emiel and Mensink, Thomas and Heek, Jonathan and Lamerigts, Kay and Gao, Ruiqi and Salimans, Tim}, title = {Simpler Diffusion: 1.5 FID on ImageNet512 with Pixel-space Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18062-18071} }
Uncertainty-Instructed Structure Injection for Generalizable HD Map Construction: Xiaolu Liu,

Ruizi Yang,

Song Wang,

Wentong Li,

Junbo Chen,

Jianke Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xiaolu and Yang, Ruizi and Wang, Song and Li, Wentong and Chen, Junbo and Zhu, Jianke}, title = {Uncertainty-Instructed Structure Injection for Generalizable HD Map Construction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22359-22368} }
STING-BEE: Towards Vision-Language Model for Real-World X-ray Baggage Security Inspection: Divya Velayudhan,

Abdelfatah Ahmed,

Mohamad Alansari,

Neha Gour,

Abderaouf Behouch,

Taimur Hassan,

Syed Talal Wasim,

Nabil Maalej,

Muzammal Naseer,

Juergen Gall,

Mohammed Bennamoun,

Ernesto Damiani,

Naoufel Werghi; [pdf] [supp]
[bibtex]
@InProceedings{Velayudhan_2025_CVPR, author = {Velayudhan, Divya and Ahmed, Abdelfatah and Alansari, Mohamad and Gour, Neha and Behouch, Abderaouf and Hassan, Taimur and Wasim, Syed Talal and Maalej, Nabil and Naseer, Muzammal and Gall, Juergen and Bennamoun, Mohammed and Damiani, Ernesto and Werghi, Naoufel}, title = {STING-BEE: Towards Vision-Language Model for Real-World X-ray Baggage Security Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20767-20777} }
Not All Parameters Matter: Masking Diffusion Models for Enhancing Generation Ability: Lei Wang,

Senmao Li,

Fei Yang,

Jianye Wang,

Ziheng Zhang,

Yuhan Liu,

Yaxing Wang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lei and Li, Senmao and Yang, Fei and Wang, Jianye and Zhang, Ziheng and Liu, Yuhan and Wang, Yaxing and Yang, Jian}, title = {Not All Parameters Matter: Masking Diffusion Models for Enhancing Generation Ability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12880-12890} }
MAD: Memory-Augmented Detection of 3D Objects: Ben Agro,

Sergio Casas,

Patrick Wang,

Thomas Gilles,

Raquel Urtasun; [pdf] [supp]
[bibtex]
@InProceedings{Agro_2025_CVPR, author = {Agro, Ben and Casas, Sergio and Wang, Patrick and Gilles, Thomas and Urtasun, Raquel}, title = {MAD: Memory-Augmented Detection of 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1449-1460} }
Doppelgangers and Adversarial Vulnerability: George Kamberov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kamberov_2025_CVPR, author = {Kamberov, George}, title = {Doppelgangers and Adversarial Vulnerability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10244-10254} }
Complexity Experts are Task-Discriminative Learners for Any Image Restoration: Eduard Zamfir,

Zongwei Wu,

Nancy Mehta,

Yuedong Tan,

Danda Pani Paudel,

Yulun Zhang,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zamfir_2025_CVPR, author = {Zamfir, Eduard and Wu, Zongwei and Mehta, Nancy and Tan, Yuedong and Paudel, Danda Pani and Zhang, Yulun and Timofte, Radu}, title = {Complexity Experts are Task-Discriminative Learners for Any Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12753-12763} }
Generative Omnimatte: Learning to Decompose Video into Layers: Yao-Chih Lee,

Erika Lu,

Sarah Rumbley,

Michal Geyer,

Jia-Bin Huang,

Tali Dekel,

Forrester Cole; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Yao-Chih and Lu, Erika and Rumbley, Sarah and Geyer, Michal and Huang, Jia-Bin and Dekel, Tali and Cole, Forrester}, title = {Generative Omnimatte: Learning to Decompose Video into Layers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12522-12532} }
5%>100%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks: Dongshuo Yin,

Leiyi Hu,

Bin Li,

Youqun Zhang,

Xue Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Dongshuo and Hu, Leiyi and Li, Bin and Zhang, Youqun and Yang, Xue}, title = {5\%\ensuremath{>}100\%: Breaking Performance Shackles of Full Fine-Tuning on Visual Recognition Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20071-20081} }
Precise Event Spotting in Sports Videos: Solving Long-Range Dependency and Class Imbalance: Sanchayan Santra,

Vishal Chudasama,

Pankaj Wasnik,

Vineeth N Balasubramanian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Santra_2025_CVPR, author = {Santra, Sanchayan and Chudasama, Vishal and Wasnik, Pankaj and Balasubramanian, Vineeth N}, title = {Precise Event Spotting in Sports Videos: Solving Long-Range Dependency and Class Imbalance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3163-3172} }
Real-IAD D3: A Real-World 2D/Pseudo-3D/3D Dataset for Industrial Anomaly Detection: Wenbing Zhu,

Lidong Wang,

Ziqing Zhou,

Chengjie Wang,

Yurui Pan,

Ruoyi Zhang,

Zhuhao Chen,

Linjie Cheng,

Bin-Bin Gao,

Jiangning Zhang,

Zhenye Gan,

Yuxie Wang,

Yulong Chen,

Shuguang Qian,

Mingmin Chi,

Bo Peng,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Wenbing and Wang, Lidong and Zhou, Ziqing and Wang, Chengjie and Pan, Yurui and Zhang, Ruoyi and Chen, Zhuhao and Cheng, Linjie and Gao, Bin-Bin and Zhang, Jiangning and Gan, Zhenye and Wang, Yuxie and Chen, Yulong and Qian, Shuguang and Chi, Mingmin and Peng, Bo and Ma, Lizhuang}, title = {Real-IAD D3: A Real-World 2D/Pseudo-3D/3D Dataset for Industrial Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15214-15223} }
Steady Progress Beats Stagnation: Mutual Aid of Foundation and Conventional Models in Mixed Domain Semi-Supervised Medical Image Segmentation: Qinghe Ma,

Jian Zhang,

Zekun Li,

Lei Qi,

Qian Yu,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Qinghe and Zhang, Jian and Li, Zekun and Qi, Lei and Yu, Qian and Shi, Yinghuan}, title = {Steady Progress Beats Stagnation: Mutual Aid of Foundation and Conventional Models in Mixed Domain Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5175-5185} }
ATP: Adaptive Threshold Pruning for Efficient Data Encoding in Quantum Neural Networks: Mohamed Afane,

Gabrielle Ebbrecht,

Ying Wang,

Juntao Chen,

Junaid Farooq; [pdf] [arXiv]
[bibtex]
@InProceedings{Afane_2025_CVPR, author = {Afane, Mohamed and Ebbrecht, Gabrielle and Wang, Ying and Chen, Juntao and Farooq, Junaid}, title = {ATP: Adaptive Threshold Pruning for Efficient Data Encoding in Quantum Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20427-20436} }
Color Alignment in Diffusion: Ka Chun Shum,

Binh-Son Hua,

Duc Thanh Nguyen,

Sai-Kit Yeung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shum_2025_CVPR, author = {Shum, Ka Chun and Hua, Binh-Son and Nguyen, Duc Thanh and Yeung, Sai-Kit}, title = {Color Alignment in Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28446-28455} }
LLAVIDAL: A Large LAnguage VIsion Model for Daily Activities of Living: Dominick Reilly,

Rajatsubhra Chakraborty,

Arkaprava Sinha,

Manish Kumar Govind,

Pu Wang,

Francois Bremond,

Le Xue,

Srijan Das; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reilly_2025_CVPR, author = {Reilly, Dominick and Chakraborty, Rajatsubhra and Sinha, Arkaprava and Govind, Manish Kumar and Wang, Pu and Bremond, Francois and Xue, Le and Das, Srijan}, title = {LLAVIDAL: A Large LAnguage VIsion Model for Daily Activities of Living}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24297-24308} }
Language-Guided Salient Object Ranking: Fang Liu,

Yuhao Liu,

Ke Xu,

Shuquan Ye,

Gerhard Petrus Hancke,

Rynson W. H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Fang and Liu, Yuhao and Xu, Ke and Ye, Shuquan and Hancke, Gerhard Petrus and Lau, Rynson W. H.}, title = {Language-Guided Salient Object Ranking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29803-29813} }
Decoupled Motion Expression Video Segmentation: Hao Fang,

Runmin Cong,

Xiankai Lu,

Xiaofei Zhou,

Sam Kwong,

Wei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Hao and Cong, Runmin and Lu, Xiankai and Zhou, Xiaofei and Kwong, Sam and Zhang, Wei}, title = {Decoupled Motion Expression Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13821-13831} }
K-LoRA: Unlocking Training-Free Fusion of Any Subject and Style LoRAs: Ziheng Ouyang,

Zhen Li,

Qibin Hou; [pdf] [supp]
[bibtex]
@InProceedings{Ouyang_2025_CVPR, author = {Ouyang, Ziheng and Li, Zhen and Hou, Qibin}, title = {K-LoRA: Unlocking Training-Free Fusion of Any Subject and Style LoRAs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13041-13050} }
Towards More General Video-based Deepfake Detection through Facial Component Guided Adaptation for Foundation Model: Yue-Hua Han,

Tai-Ming Huang,

Kai-Lung Hua,

Jun-Cheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Yue-Hua and Huang, Tai-Ming and Hua, Kai-Lung and Chen, Jun-Cheng}, title = {Towards More General Video-based Deepfake Detection through Facial Component Guided Adaptation for Foundation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22995-23005} }
WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model: Zongjian Li,

Bin Lin,

Yang Ye,

Liuhan Chen,

Xinhua Cheng,

Shenghai Yuan,

Li Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zongjian and Lin, Bin and Ye, Yang and Chen, Liuhan and Cheng, Xinhua and Yuan, Shenghai and Yuan, Li}, title = {WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17778-17788} }
SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate Cross-Modal Semantic Prompts: Shijia Zhao,

Qiming Xia,

Xusheng Guo,

Pufan Zou,

Maoji Zheng,

Hai Wu,

Chenglu Wen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shijia and Xia, Qiming and Guo, Xusheng and Zou, Pufan and Zheng, Maoji and Wu, Hai and Wen, Chenglu and Wang, Cheng}, title = {SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate Cross-Modal Semantic Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29374-29384} }
ARKit LabelMaker: A New Scale for Indoor 3D Scene Understanding: Guangda Ji,

Silvan Weder,

Francis Engelmann,

Marc Pollefeys,

Hermann Blum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Guangda and Weder, Silvan and Engelmann, Francis and Pollefeys, Marc and Blum, Hermann}, title = {ARKit LabelMaker: A New Scale for Indoor 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4398-4407} }
VoCo-LLaMA: Towards Vision Compression with Large Language Models: Xubing Ye,

Yukang Gan,

Xiaoke Huang,

Yixiao Ge,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Xubing and Gan, Yukang and Huang, Xiaoke and Ge, Yixiao and Tang, Yansong}, title = {VoCo-LLaMA: Towards Vision Compression with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29836-29846} }
StreamingT2V: Consistent, Dynamic, and Extendable Long Video Generation from Text: Roberto Henschel,

Levon Khachatryan,

Hayk Poghosyan,

Daniil Hayrapetyan,

Vahram Tadevosyan,

Zhangyang Wang,

Shant Navasardyan,

Humphrey Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Henschel_2025_CVPR, author = {Henschel, Roberto and Khachatryan, Levon and Poghosyan, Hayk and Hayrapetyan, Daniil and Tadevosyan, Vahram and Wang, Zhangyang and Navasardyan, Shant and Shi, Humphrey}, title = {StreamingT2V: Consistent, Dynamic, and Extendable Long Video Generation from Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2568-2577} }
Focal Split: Untethered Snapshot Depth from Differential Defocus: Junjie Luo,

John Mamish,

Alan Fu,

Thomas Concannon,

Josiah Hester,

Emma Alexander,

Qi Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Junjie and Mamish, John and Fu, Alan and Concannon, Thomas and Hester, Josiah and Alexander, Emma and Guo, Qi}, title = {Focal Split: Untethered Snapshot Depth from Differential Defocus}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26965-26974} }
AFL: A Single-Round Analytic Approach for Federated Learning with Pre-trained Models: Run He,

Kai Tong,

Di Fang,

Han Sun,

Ziqian Zeng,

Haoran Li,

Tianyi Chen,

Huiping Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Run and Tong, Kai and Fang, Di and Sun, Han and Zeng, Ziqian and Li, Haoran and Chen, Tianyi and Zhuang, Huiping}, title = {AFL: A Single-Round Analytic Approach for Federated Learning with Pre-trained Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4988-4998} }
XLRS-Bench: Could Your Multimodal LLMs Understand Extremely Large Ultra-High-Resolution Remote Sensing Imagery?: Fengxiang Wang,

Hongzhen Wang,

Zonghao Guo,

Di Wang,

Yulin Wang,

Mingshuo Chen,

Qiang Ma,

Long Lan,

Wenjing Yang,

Jing Zhang,

Zhiyuan Liu,

Maosong Sun; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Fengxiang and Wang, Hongzhen and Guo, Zonghao and Wang, Di and Wang, Yulin and Chen, Mingshuo and Ma, Qiang and Lan, Long and Yang, Wenjing and Zhang, Jing and Liu, Zhiyuan and Sun, Maosong}, title = {XLRS-Bench: Could Your Multimodal LLMs Understand Extremely Large Ultra-High-Resolution Remote Sensing Imagery?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14325-14336} }
BOLT: Boost Large Vision-Language Model Without Training for Long-form Video Understanding: Shuming Liu,

Chen Zhao,

Tianqi Xu,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shuming and Zhao, Chen and Xu, Tianqi and Ghanem, Bernard}, title = {BOLT: Boost Large Vision-Language Model Without Training for Long-form Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3318-3327} }
Efficient Data Driven Mixture-of-Expert Extraction from Trained Networks: Uranik Berisha,

Jens Mehnert,

Alexandru Paul Condurache; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berisha_2025_CVPR, author = {Berisha, Uranik and Mehnert, Jens and Condurache, Alexandru Paul}, title = {Efficient Data Driven Mixture-of-Expert Extraction from Trained Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20082-20091} }
Reference-Based 3D-Aware Image Editing with Triplanes: Bahri Batuhan Bilecen,

Yigit Yalin,

Ning Yu,

Aysegul Dundar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bilecen_2025_CVPR, author = {Bilecen, Bahri Batuhan and Yalin, Yigit and Yu, Ning and Dundar, Aysegul}, title = {Reference-Based 3D-Aware Image Editing with Triplanes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5904-5915} }
StyleSSP: Sampling StartPoint Enhancement for Training-free Diffusion-based Method for Style Transfer: Ruojun Xu,

Weijie Xi,

XiaoDi Wang,

Yongbo Mao,

Zach Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Ruojun and Xi, Weijie and Wang, XiaoDi and Mao, Yongbo and Cheng, Zach}, title = {StyleSSP: Sampling StartPoint Enhancement for Training-free Diffusion-based Method for Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18260-18269} }
PURA: Parameter Update-Recovery Test-Time Adaption for RGB-T Tracking: Zekai Shao,

Yufan Hu,

Bin Fan,

Hongmin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Zekai and Hu, Yufan and Fan, Bin and Liu, Hongmin}, title = {PURA: Parameter Update-Recovery Test-Time Adaption for RGB-T Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22089-22098} }
One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception: Yuchen Xia,

Quan Yuan,

Guiyang Luo,

Xiaoyuan Fu,

Yang Li,

Xuanhan Zhu,

Tianyou Luo,

Siheng Chen,

Jinglin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Yuchen and Yuan, Quan and Luo, Guiyang and Fu, Xiaoyuan and Li, Yang and Zhu, Xuanhan and Luo, Tianyou and Chen, Siheng and Li, Jinglin}, title = {One is Plenty: A Polymorphic Feature Interpreter for Immutable Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1592-1601} }
Towards All-in-One Medical Image Re-Identification: Yuan Tian,

Kaiyuan Ji,

Rongzhao Zhang,

Yankai Jiang,

Chunyi Li,

Xiaosong Wang,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Yuan and Ji, Kaiyuan and Zhang, Rongzhao and Jiang, Yankai and Li, Chunyi and Wang, Xiaosong and Zhai, Guangtao}, title = {Towards All-in-One Medical Image Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30774-30786} }
SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by Imitating Human Annotator Trajectories: Muzhi Zhu,

Yuzhuo Tian,

Hao Chen,

Chunluan Zhou,

Qingpei Guo,

Yang Liu,

Ming Yang,

Chunhua Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Muzhi and Tian, Yuzhuo and Chen, Hao and Zhou, Chunluan and Guo, Qingpei and Liu, Yang and Yang, Ming and Shen, Chunhua}, title = {SegAgent: Exploring Pixel Understanding Capabilities in MLLMs by Imitating Human Annotator Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3686-3696} }
Motions as Queries: One-Stage Multi-Person Holistic Human Motion Capture: Kenkun Liu,

Yurong Fu,

Weihao Yuan,

Jing Lin,

Peihao Li,

Xiaodong Gu,

Lingteng Qiu,

Haoqian Wang,

Zilong Dong,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Kenkun and Fu, Yurong and Yuan, Weihao and Lin, Jing and Li, Peihao and Gu, Xiaodong and Qiu, Lingteng and Wang, Haoqian and Dong, Zilong and Han, Xiaoguang}, title = {Motions as Queries: One-Stage Multi-Person Holistic Human Motion Capture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17529-17539} }
SceneCrafter: Controllable Multi-View Driving Scene Editing: Zehao Zhu,

Yuliang Zou,

Chiyu Max Jiang,

Bo Sun,

Vincent Casser,

Xiukun Huang,

Jiahao Wang,

Zhenpei Yang,

Ruiqi Gao,

Leonidas Guibas,

Mingxing Tan,

Dragomir Anguelov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zehao and Zou, Yuliang and Jiang, Chiyu Max and Sun, Bo and Casser, Vincent and Huang, Xiukun and Wang, Jiahao and Yang, Zhenpei and Gao, Ruiqi and Guibas, Leonidas and Tan, Mingxing and Anguelov, Dragomir}, title = {SceneCrafter: Controllable Multi-View Driving Scene Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6812-6822} }
AMO Sampler: Enhancing Text Rendering with Overshooting: Xixi Hu,

Keyang Xu,

Bo Liu,

Qiang Liu,

Hongliang Fei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Xixi and Xu, Keyang and Liu, Bo and Liu, Qiang and Fei, Hongliang}, title = {AMO Sampler: Enhancing Text Rendering with Overshooting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13157-13166} }
ImViD: Immersive Volumetric Videos for Enhanced VR Engagement: Zhengxian Yang,

Shi Pan,

Shengqi Wang,

Haoxiang Wang,

Li Lin,

Guanjun Li,

Zhengqi Wen,

Borong Lin,

Jianhua Tao,

Tao Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhengxian and Pan, Shi and Wang, Shengqi and Wang, Haoxiang and Lin, Li and Li, Guanjun and Wen, Zhengqi and Lin, Borong and Tao, Jianhua and Yu, Tao}, title = {ImViD: Immersive Volumetric Videos for Enhanced VR Engagement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16554-16564} }
Integral Fast Fourier Color Constancy: Wenjun Wei,

Yanlin Qian,

Huaian Chen,

Junkang Dai,

Yi Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Wenjun and Qian, Yanlin and Chen, Huaian and Dai, Junkang and Jin, Yi}, title = {Integral Fast Fourier Color Constancy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26420-26429} }
I2VGuard: Safeguarding Images against Misuse in Diffusion-based Image-to-Video Models: Dongnan Gui,

Xun Guo,

Wengang Zhou,

Yan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Gui_2025_CVPR, author = {Gui, Dongnan and Guo, Xun and Zhou, Wengang and Lu, Yan}, title = {I2VGuard: Safeguarding Images against Misuse in Diffusion-based Image-to-Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12595-12604} }
Saliuitl: Ensemble Salience Guided Recovery of Adversarial Patches against CNNs: Mauricio Byrd Victorica,

György Dán,

Henrik Sandberg; [pdf] [supp]
[bibtex]
@InProceedings{Victorica_2025_CVPR, author = {Victorica, Mauricio Byrd and D\'an, Gy\"orgy and Sandberg, Henrik}, title = {Saliuitl: Ensemble Salience Guided Recovery of Adversarial Patches against CNNs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20360-20369} }
HeatFormer: A Neural Optimizer for Multiview Human Mesh Recovery: Yuto Matsubara,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsubara_2025_CVPR, author = {Matsubara, Yuto and Nishino, Ko}, title = {HeatFormer: A Neural Optimizer for Multiview Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6415-6424} }
ResCLIP: Residual Attention for Training-free Dense Vision-language Inference: Yuhang Yang,

Jinhong Deng,

Wen Li,

Lixin Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yuhang and Deng, Jinhong and Li, Wen and Duan, Lixin}, title = {ResCLIP: Residual Attention for Training-free Dense Vision-language Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29968-29978} }
GPS as a Control Signal for Image Generation: Chao Feng,

Ziyang Chen,

Aleksander Holynski,

Alexei A. Efros,

Andrew Owens; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Chao and Chen, Ziyang and Holynski, Aleksander and Efros, Alexei A. and Owens, Andrew}, title = {GPS as a Control Signal for Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2766-2778} }
CPath-Omni: A Unified Multimodal Foundation Model for Patch and Whole Slide Image Analysis in Computational Pathology: Yuxuan Sun,

Yixuan Si,

Chenglu Zhu,

Xuan Gong,

Kai Zhang,

Pingyi Chen,

Ye Zhang,

Zhongyi Shui,

Tao Lin,

Lin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yuxuan and Si, Yixuan and Zhu, Chenglu and Gong, Xuan and Zhang, Kai and Chen, Pingyi and Zhang, Ye and Shui, Zhongyi and Lin, Tao and Yang, Lin}, title = {CPath-Omni: A Unified Multimodal Foundation Model for Patch and Whole Slide Image Analysis in Computational Pathology}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10360-10371} }
OPTICAL: Leveraging Optimal Transport for Contribution Allocation in Dataset Distillation: Xiao Cui,

Yulei Qin,

Wengang Zhou,

Hongsheng Li,

Houqiang Li; [pdf]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Xiao and Qin, Yulei and Zhou, Wengang and Li, Hongsheng and Li, Houqiang}, title = {OPTICAL: Leveraging Optimal Transport for Contribution Allocation in Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15245-15254} }
MAGiC-SLAM: Multi-Agent Gaussian Globally Consistent SLAM: Vladimir Yugay,

Theo Gevers,

Martin R. Oswald; [pdf]
[bibtex]
@InProceedings{Yugay_2025_CVPR, author = {Yugay, Vladimir and Gevers, Theo and Oswald, Martin R.}, title = {MAGiC-SLAM: Multi-Agent Gaussian Globally Consistent SLAM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6741-6750} }
Dispider: Enabling Video LLMs with Active Real-Time Interaction via Disentangled Perception, Decision, and Reaction: Rui Qian,

Shuangrui Ding,

Xiaoyi Dong,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Dahua Lin,

Jiaqi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Rui and Ding, Shuangrui and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {Dispider: Enabling Video LLMs with Active Real-Time Interaction via Disentangled Perception, Decision, and Reaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24045-24055} }
NTClick: Achieving Precise Interactive Segmentation With Noise-tolerant Clicks: Chenyi Zhang,

Ting Liu,

Xiaochao Qu,

Luoqi Liu,

Yao Zhao,

Yunchao Wei; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyi and Liu, Ting and Qu, Xiaochao and Liu, Luoqi and Zhao, Yao and Wei, Yunchao}, title = {NTClick: Achieving Precise Interactive Segmentation With Noise-tolerant Clicks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8921-8930} }
Show and Segment: Universal Medical Image Segmentation via In-Context Learning: Yunhe Gao,

Di Liu,

Zhuowei Li,

Yunsheng Li,

Dongdong Chen,

Mu Zhou,

Dimitris N. Metaxas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Yunhe and Liu, Di and Li, Zhuowei and Li, Yunsheng and Chen, Dongdong and Zhou, Mu and Metaxas, Dimitris N.}, title = {Show and Segment: Universal Medical Image Segmentation via In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20830-20840} }
MVGenMaster: Scaling Multi-View Generation from Any Image via 3D Priors Enhanced Diffusion Model: Chenjie Cao,

Chaohui Yu,

Shang Liu,

Fan Wang,

Xiangyang Xue,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Chenjie and Yu, Chaohui and Liu, Shang and Wang, Fan and Xue, Xiangyang and Fu, Yanwei}, title = {MVGenMaster: Scaling Multi-View Generation from Any Image via 3D Priors Enhanced Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6045-6056} }
CADCrafter: Generating Computer-Aided Design Models from Unconstrained Images: Cheng Chen,

Jiacheng Wei,

Tianrun Chen,

Chi Zhang,

Xiaofeng Yang,

Shangzhan Zhang,

Bingchen Yang,

Chuan-Sheng Foo,

Guosheng Lin,

Qixing Huang,

Fayao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Cheng and Wei, Jiacheng and Chen, Tianrun and Zhang, Chi and Yang, Xiaofeng and Zhang, Shangzhan and Yang, Bingchen and Foo, Chuan-Sheng and Lin, Guosheng and Huang, Qixing and Liu, Fayao}, title = {CADCrafter: Generating Computer-Aided Design Models from Unconstrained Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11073-11082} }
Bayesian Test-Time Adaptation for Vision-Language Models: Lihua Zhou,

Mao Ye,

Shuaifeng Li,

Nianxin Li,

Xiatian Zhu,

Lei Deng,

Hongbin Liu,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Lihua and Ye, Mao and Li, Shuaifeng and Li, Nianxin and Zhu, Xiatian and Deng, Lei and Liu, Hongbin and Lei, Zhen}, title = {Bayesian Test-Time Adaptation for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29999-30009} }
Generative Multiview Relighting for 3D Reconstruction under Extreme Illumination Variation: Hadi Alzayer,

Philipp Henzler,

Jonathan T. Barron,

Jia-Bin Huang,

Pratul P. Srinivasan,

Dor Verbin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alzayer_2025_CVPR, author = {Alzayer, Hadi and Henzler, Philipp and Barron, Jonathan T. and Huang, Jia-Bin and Srinivasan, Pratul P. and Verbin, Dor}, title = {Generative Multiview Relighting for 3D Reconstruction under Extreme Illumination Variation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10933-10942} }
Causal Composition Diffusion Model for Closed-loop Traffic Generation: Haohong Lin,

Xin Huang,

Tung Phan,

David Hayden,

Huan Zhang,

Ding Zhao,

Siddhartha Srinivasa,

Eric Wolff,

Hongge Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Haohong and Huang, Xin and Phan, Tung and Hayden, David and Zhang, Huan and Zhao, Ding and Srinivasa, Siddhartha and Wolff, Eric and Chen, Hongge}, title = {Causal Composition Diffusion Model for Closed-loop Traffic Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27542-27552} }
Change3D: Revisiting Change Detection and Captioning from A Video Modeling Perspective: Duowang Zhu,

Xiaohu Huang,

Haiyan Huang,

Hao Zhou,

Zhenfeng Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Duowang and Huang, Xiaohu and Huang, Haiyan and Zhou, Hao and Shao, Zhenfeng}, title = {Change3D: Revisiting Change Detection and Captioning from A Video Modeling Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24011-24022} }
DyMO: Training-Free Diffusion Model Alignment with Dynamic Multi-Objective Scheduling: Xin Xie,

Dong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Xin and Gong, Dong}, title = {DyMO: Training-Free Diffusion Model Alignment with Dynamic Multi-Objective Scheduling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13220-13230} }
HiMoR: Monocular Deformable Gaussian Reconstruction with Hierarchical Motion Representation: Yiming Liang,

Tianhan Xu,

Yuta Kikuchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yiming and Xu, Tianhan and Kikuchi, Yuta}, title = {HiMoR: Monocular Deformable Gaussian Reconstruction with Hierarchical Motion Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {886-895} }
GENIUS: A Generative Framework for Universal Multimodal Search: Sungyeon Kim,

Xinliang Zhu,

Xiaofan Lin,

Muhammet Bastan,

Douglas Gray,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sungyeon and Zhu, Xinliang and Lin, Xiaofan and Bastan, Muhammet and Gray, Douglas and Kwak, Suha}, title = {GENIUS: A Generative Framework for Universal Multimodal Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19659-19669} }
Enhanced Visual-Semantic Interaction with Tailored Prompts for Pedestrian Attribute Recognition: Junyi Wu,

Yan Huang,

Min Gao,

Yuzhen Niu,

Yuzhong Chen,

Qiang Wu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Junyi and Huang, Yan and Gao, Min and Niu, Yuzhen and Chen, Yuzhong and Wu, Qiang}, title = {Enhanced Visual-Semantic Interaction with Tailored Prompts for Pedestrian Attribute Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9570-9579} }
SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement: Mark Boss,

Zixuan Huang,

Aaryaman Vasishta,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Boss_2025_CVPR, author = {Boss, Mark and Huang, Zixuan and Vasishta, Aaryaman and Jampani, Varun}, title = {SF3D: Stable Fast 3D Mesh Reconstruction with UV-unwrapping and Illumination Disentanglement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16240-16250} }
HSI-GPT: A General-Purpose Large Scene-Motion-Language Model for Human Scene Interaction: Yuan Wang,

Yali Li,

Xiang Li,

Shengjin Wang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuan and Li, Yali and Li, Xiang and Wang, Shengjin}, title = {HSI-GPT: A General-Purpose Large Scene-Motion-Language Model for Human Scene Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7147-7157} }
Towards Precise Embodied Dialogue Localization via Causality Guided Diffusion: Haoyu Wang,

Le Wang,

Sanping Zhou,

Jingyi Tian,

Zheng Qin,

Yabing Wang,

Gang Hua,

Wei Tang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haoyu and Wang, Le and Zhou, Sanping and Tian, Jingyi and Qin, Zheng and Wang, Yabing and Hua, Gang and Tang, Wei}, title = {Towards Precise Embodied Dialogue Localization via Causality Guided Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13350-13360} }
Vid2Avatar-Pro: Authentic Avatar from Videos in the Wild via Universal Prior: Chen Guo,

Junxuan Li,

Yash Kant,

Yaser Sheikh,

Shunsuke Saito,

Chen Cao; [pdf]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Chen and Li, Junxuan and Kant, Yash and Sheikh, Yaser and Saito, Shunsuke and Cao, Chen}, title = {Vid2Avatar-Pro: Authentic Avatar from Videos in the Wild via Universal Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5559-5570} }
RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing: Zhipeng Huang,

Wangbo Yu,

Xinhua Cheng,

Chengshu Zhao,

Yunyang Ge,

Mingyi Guo,

Li Yuan,

Yonghong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhipeng and Yu, Wangbo and Cheng, Xinhua and Zhao, Chengshu and Ge, Yunyang and Guo, Mingyi and Yuan, Li and Tian, Yonghong}, title = {RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {574-584} }
Attribute-formed Class-specific Concept Space: Endowing Language Bottleneck Model with Better Interpretability and Scalability: Jianyang Zhang,

Qianli Luo,

Guowu Yang,

Wenjing Yang,

Weide Liu,

Guosheng Lin,

Fengmao Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianyang and Luo, Qianli and Yang, Guowu and Yang, Wenjing and Liu, Weide and Lin, Guosheng and Lv, Fengmao}, title = {Attribute-formed Class-specific Concept Space: Endowing Language Bottleneck Model with Better Interpretability and Scalability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30291-30300} }
Customized Condition Controllable Generation for Video Soundtrack: Fan Qi,

Kunsheng Ma,

Changsheng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Fan and Ma, Kunsheng and Xu, Changsheng}, title = {Customized Condition Controllable Generation for Video Soundtrack}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23914-23924} }
ProjAttacker: A Configurable Physical Adversarial Attack for Face Recognition via Projector: Yuanwei Liu,

Hui Wei,

Chengyu Jia,

Ruqi Xiao,

Weijian Ruan,

Xingxing Wei,

Joey Tianyi Zhou,

Zheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanwei and Wei, Hui and Jia, Chengyu and Xiao, Ruqi and Ruan, Weijian and Wei, Xingxing and Zhou, Joey Tianyi and Wang, Zheng}, title = {ProjAttacker: A Configurable Physical Adversarial Attack for Face Recognition via Projector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21248-21257} }
EfficientViM: Efficient Vision Mamba with Hidden State Mixer based State Space Duality: Sanghyeok Lee,

Joonmyung Choi,

Hyunwoo J. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Sanghyeok and Choi, Joonmyung and Kim, Hyunwoo J.}, title = {EfficientViM: Efficient Vision Mamba with Hidden State Mixer based State Space Duality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14923-14933} }
A4A: Adapter for Adapter Transfer via All-for-All Mapping for Cross-Architecture Models: Keyu Tu,

Mengqi Huang,

Zhuowei Chen,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Keyu and Huang, Mengqi and Chen, Zhuowei and Mao, Zhendong}, title = {A4A: Adapter for Adapter Transfer via All-for-All Mapping for Cross-Architecture Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18476-18485} }
ViCaS: A Dataset for Combining Holistic and Pixel-level Video Understanding using Captions with Grounded Segmentation: Ali Athar,

Xueqing Deng,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Athar_2025_CVPR, author = {Athar, Ali and Deng, Xueqing and Chen, Liang-Chieh}, title = {ViCaS: A Dataset for Combining Holistic and Pixel-level Video Understanding using Captions with Grounded Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19023-19035} }
A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts: Xuyi He,

Yuhui Quan,

Ruotao Xu,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Xuyi and Quan, Yuhui and Xu, Ruotao and Ji, Hui}, title = {A Universal Scale-Adaptive Deformable Transformer for Image Restoration across Diverse Artifacts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12731-12741} }
WISE: A Framework for Gigapixel Whole-Slide-Image Lossless Compression: Yu Mao,

Jun Wang,

Nan Guan,

Chun Jason Xue; [pdf] [arXiv]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Yu and Wang, Jun and Guan, Nan and Xue, Chun Jason}, title = {WISE: A Framework for Gigapixel Whole-Slide-Image Lossless Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29342-29351} }
Gromov-Wasserstein Problem with Cyclic Symmetry: Shoichiro Takeda,

Yasunori Akagi; [pdf] [supp]
[bibtex]
@InProceedings{Takeda_2025_CVPR, author = {Takeda, Shoichiro and Akagi, Yasunori}, title = {Gromov-Wasserstein Problem with Cyclic Symmetry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21011-21020} }
IRIS: Inverse Rendering of Indoor Scenes from Low Dynamic Range Images: Chih-Hao Lin,

Jia-Bin Huang,

Zhengqin Li,

Zhao Dong,

Christian Richardt,

Tuotuo Li,

Michael Zollhöfer,

Johannes Kopf,

Shenlong Wang,

Changil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Chih-Hao and Huang, Jia-Bin and Li, Zhengqin and Dong, Zhao and Richardt, Christian and Li, Tuotuo and Zollh\"ofer, Michael and Kopf, Johannes and Wang, Shenlong and Kim, Changil}, title = {IRIS: Inverse Rendering of Indoor Scenes from Low Dynamic Range Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {465-474} }
SimAvatar: Simulation-Ready Avatars with Layered Hair and Clothing: Xueting Li,

Ye Yuan,

Shalini De Mello,

Gilles Daviet,

Jonathan Leaf,

Miles Macklin,

Jan Kautz,

Umar Iqbal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xueting and Yuan, Ye and De Mello, Shalini and Daviet, Gilles and Leaf, Jonathan and Macklin, Miles and Kautz, Jan and Iqbal, Umar}, title = {SimAvatar: Simulation-Ready Avatars with Layered Hair and Clothing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26320-26330} }
Test-Time Backdoor Detection for Object Detection Models: Hangtao Zhang,

Yichen Wang,

Shihui Yan,

Chenyu Zhu,

Ziqi Zhou,

Linshan Hou,

Shengshan Hu,

Minghui Li,

Yanjun Zhang,

Leo Yu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hangtao and Wang, Yichen and Yan, Shihui and Zhu, Chenyu and Zhou, Ziqi and Hou, Linshan and Hu, Shengshan and Li, Minghui and Zhang, Yanjun and Zhang, Leo Yu}, title = {Test-Time Backdoor Detection for Object Detection Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24377-24386} }
Towards Precise Scaling Laws for Video Diffusion Transformers: Yuanyang Yin,

Yaqi Zhao,

Mingwu Zheng,

Ke Lin,

Jiarong Ou,

Rui Chen,

Victor Shea-Jay Huang,

Jiahao Wang,

Xin Tao,

Pengfei Wan,

Di Zhang,

Baoqun Yin,

Wentao Zhang,

Kun Gai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Yuanyang and Zhao, Yaqi and Zheng, Mingwu and Lin, Ke and Ou, Jiarong and Chen, Rui and Huang, Victor Shea-Jay and Wang, Jiahao and Tao, Xin and Wan, Pengfei and Zhang, Di and Yin, Baoqun and Zhang, Wentao and Gai, Kun}, title = {Towards Precise Scaling Laws for Video Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18155-18165} }
RoGSplat: Learning Robust Generalizable Human Gaussian Splatting from Sparse Multi-View Images: Junjin Xiao,

Qing Zhang,

Yonewei Nie,

Lei Zhu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Junjin and Zhang, Qing and Nie, Yonewei and Zhu, Lei and Zheng, Wei-Shi}, title = {RoGSplat: Learning Robust Generalizable Human Gaussian Splatting from Sparse Multi-View Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5980-5990} }
SDBF: Steep-Decision-Boundary Fingerprinting for Hard-Label Tampering Detection of DNN Models: Xiaofan Bai,

Shixin Li,

Xiaojing Ma,

Bin Benjamin Zhu,

Dongmei Zhang,

Linchen Yu; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Xiaofan and Li, Shixin and Ma, Xiaojing and Zhu, Bin Benjamin and Zhang, Dongmei and Yu, Linchen}, title = {SDBF: Steep-Decision-Boundary Fingerprinting for Hard-Label Tampering Detection of DNN Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29278-29287} }
EnliveningGS: Active Locomotion of 3DGS: Siyuan Shen,

Tianjia Shao,

Kun Zhou,

Chenfanfu Jiang,

Yin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Siyuan and Shao, Tianjia and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {EnliveningGS: Active Locomotion of 3DGS}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {896-905} }
SPMTrack: Spatio-Temporal Parameter-Efficient Fine-Tuning with Mixture of Experts for Scalable Visual Tracking: Wenrui Cai,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Wenrui and Liu, Qingjie and Wang, Yunhong}, title = {SPMTrack: Spatio-Temporal Parameter-Efficient Fine-Tuning with Mixture of Experts for Scalable Visual Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16871-16881} }
AnyCam: Learning to Recover Camera Poses and Intrinsics from Casual Videos: Felix Wimbauer,

Weirong Chen,

Dominik Muhle,

Christian Rupprecht,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wimbauer_2025_CVPR, author = {Wimbauer, Felix and Chen, Weirong and Muhle, Dominik and Rupprecht, Christian and Cremers, Daniel}, title = {AnyCam: Learning to Recover Camera Poses and Intrinsics from Casual Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16717-16727} }
Knowledge-Aligned Counterfactual-Enhancement Diffusion Perception for Unsupervised Cross-Domain Visual Emotion Recognition: Wen Yin,

Yong Wang,

Guiduo Duan,

Dongyang Zhang,

Xin Hu,

Yuan-Fang Li,

Tao He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Wen and Wang, Yong and Duan, Guiduo and Zhang, Dongyang and Hu, Xin and Li, Yuan-Fang and He, Tao}, title = {Knowledge-Aligned Counterfactual-Enhancement Diffusion Perception for Unsupervised Cross-Domain Visual Emotion Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3888-3898} }
Distilling Multi-modal Large Language Models for Autonomous Driving: Deepti Hegde,

Rajeev Yasarla,

Hong Cai,

Shizhong Han,

Apratim Bhattacharyya,

Shweta Mahajan,

Litian Liu,

Risheek Garrepalli,

Vishal M. Patel,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hegde_2025_CVPR, author = {Hegde, Deepti and Yasarla, Rajeev and Cai, Hong and Han, Shizhong and Bhattacharyya, Apratim and Mahajan, Shweta and Liu, Litian and Garrepalli, Risheek and Patel, Vishal M. and Porikli, Fatih}, title = {Distilling Multi-modal Large Language Models for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27575-27585} }
Pixel-aligned RGB-NIR Stereo Imaging and Dataset for Robot Vision: Jinnyeong Kim,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jinnyeong and Baek, Seung-Hwan}, title = {Pixel-aligned RGB-NIR Stereo Imaging and Dataset for Robot Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11482-11492} }
Can Machines Understand Composition? Dataset and Benchmark for Photographic Image Composition Embedding and Understanding: Zhaoran Zhao,

Peng Lu,

Anran Zhang,

Peipei Li,

Xia Li,

Xuannan Liu,

Yang Hu,

Shiyi Chen,

Liwei Wang,

Wenhao Guo; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhaoran and Lu, Peng and Zhang, Anran and Li, Peipei and Li, Xia and Liu, Xuannan and Hu, Yang and Chen, Shiyi and Wang, Liwei and Guo, Wenhao}, title = {Can Machines Understand Composition? Dataset and Benchmark for Photographic Image Composition Embedding and Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14411-14421} }
LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation: Vladan Stojnić,

Yannis Kalantidis,

Jiří Matas,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Stojnic_2025_CVPR, author = {Stojni\'c, Vladan and Kalantidis, Yannis and Matas, Ji\v{r}{\'\i} and Tolias, Giorgos}, title = {LPOSS: Label Propagation Over Patches and Pixels for Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9794-9803} }
Towards Efficient Foundation Model for Zero-shot Amodal Segmentation: Zhaochen Liu,

Limeng Qiao,

Xiangxiang Chu,

Lin Ma,

Tingting Jiang; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaochen and Qiao, Limeng and Chu, Xiangxiang and Ma, Lin and Jiang, Tingting}, title = {Towards Efficient Foundation Model for Zero-shot Amodal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20254-20264} }
PhysGen3D: Crafting a Miniature Interactive World from a Single Image: Boyuan Chen,

Hanxiao Jiang,

Shaowei Liu,

Saurabh Gupta,

Yunzhu Li,

Hao Zhao,

Shenlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Boyuan and Jiang, Hanxiao and Liu, Shaowei and Gupta, Saurabh and Li, Yunzhu and Zhao, Hao and Wang, Shenlong}, title = {PhysGen3D: Crafting a Miniature Interactive World from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6178-6189} }
Docopilot: Improving Multimodal Models for Document-Level Understanding: Yuchen Duan,

Zhe Chen,

Yusong Hu,

Weiyun Wang,

Shenglong Ye,

Botian Shi,

Lewei Lu,

Qibin Hou,

Tong Lu,

Hongsheng Li,

Jifeng Dai,

Wenhai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Yuchen and Chen, Zhe and Hu, Yusong and Wang, Weiyun and Ye, Shenglong and Shi, Botian and Lu, Lewei and Hou, Qibin and Lu, Tong and Li, Hongsheng and Dai, Jifeng and Wang, Wenhai}, title = {Docopilot: Improving Multimodal Models for Document-Level Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4026-4037} }
Scaling Properties of Diffusion Models For Perceptual Tasks: Rahul Ravishankar,

Zeeshan Patel,

Jathushan Rajasegaran,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ravishankar_2025_CVPR, author = {Ravishankar, Rahul and Patel, Zeeshan and Rajasegaran, Jathushan and Malik, Jitendra}, title = {Scaling Properties of Diffusion Models For Perceptual Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12945-12954} }
HD-EPIC: A Highly-Detailed Egocentric Video Dataset: Toby Perrett,

Ahmad Darkhalil,

Saptarshi Sinha,

Omar Emara,

Sam Pollard,

Kranti Kumar Parida,

Kaiting Liu,

Prajwal Gatti,

Siddhant Bansal,

Kevin Flanagan,

Jacob Chalk,

Zhifan Zhu,

Rhodri Guerrier,

Fahd Abdelazim,

Bin Zhu,

Davide Moltisanti,

Michael Wray,

Hazel Doughty,

Dima Damen; [pdf] [supp]
[bibtex]
@InProceedings{Perrett_2025_CVPR, author = {Perrett, Toby and Darkhalil, Ahmad and Sinha, Saptarshi and Emara, Omar and Pollard, Sam and Parida, Kranti Kumar and Liu, Kaiting and Gatti, Prajwal and Bansal, Siddhant and Flanagan, Kevin and Chalk, Jacob and Zhu, Zhifan and Guerrier, Rhodri and Abdelazim, Fahd and Zhu, Bin and Moltisanti, Davide and Wray, Michael and Doughty, Hazel and Damen, Dima}, title = {HD-EPIC: A Highly-Detailed Egocentric Video Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23901-23913} }
Exact: Exploring Space-Time Perceptive Clues for Weakly Supervised Satellite Image Time Series Semantic Segmentation: Hao Zhu,

Yan Zhu,

Jiayu Xiao,

Tianxiang Xiao,

Yike Ma,

Yucheng Zhang,

Feng Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Hao and Zhu, Yan and Xiao, Jiayu and Xiao, Tianxiang and Ma, Yike and Zhang, Yucheng and Dai, Feng}, title = {Exact: Exploring Space-Time Perceptive Clues for Weakly Supervised Satellite Image Time Series Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14036-14045} }
Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training: Haicheng Wang,

Chen Ju,

Weixiong Lin,

Shuai Xiao,

Mengting Chen,

Yixuan Huang,

Chang Liu,

Mingshuai Yao,

Jinsong Lan,

Ying Chen,

Qingwen Liu,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haicheng and Ju, Chen and Lin, Weixiong and Xiao, Shuai and Chen, Mengting and Huang, Yixuan and Liu, Chang and Yao, Mingshuai and Lan, Jinsong and Chen, Ying and Liu, Qingwen and Wang, Yanfeng}, title = {Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29791-29802} }
PolarFree: Polarization-based Reflection-Free Imaging: Mingde Yao,

Menglu Wang,

King-Man Tam,

Lingen Li,

Tianfan Xue,

Jinwei Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Mingde and Wang, Menglu and Tam, King-Man and Li, Lingen and Xue, Tianfan and Gu, Jinwei}, title = {PolarFree: Polarization-based Reflection-Free Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10890-10899} }
H-MoRe: Learning Human-centric Motion Representation for Action Analysis: Zhanbo Huang,

Xiaoming Liu,

Yu Kong; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhanbo and Liu, Xiaoming and Kong, Yu}, title = {H-MoRe: Learning Human-centric Motion Representation for Action Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22702-22713} }
Hierarchical Compact Clustering Attention (COCA) for Unsupervised Object-Centric Learning: Can Kucuksozen,

Yucel Yemez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kucuksozen_2025_CVPR, author = {Kucuksozen, Can and Yemez, Yucel}, title = {Hierarchical Compact Clustering Attention (COCA) for Unsupervised Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25388-25398} }
Effortless Active Labeling for Long-Term Test-Time Adaptation: Guowei Wang,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Guowei and Ding, Changxing}, title = {Effortless Active Labeling for Long-Term Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25633-25642} }
Leveraging Temporal Cues for Semi-Supervised Multi-View 3D Object Detection: Jinhyung Park,

Navyata Sanghvi,

Hiroki Adachi,

Yoshihisa Shibata,

Shawn Hunt,

Shinya Tanaka,

Hironobu Fujiyoshi,

Kris Kitani; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jinhyung and Sanghvi, Navyata and Adachi, Hiroki and Shibata, Yoshihisa and Hunt, Shawn and Tanaka, Shinya and Fujiyoshi, Hironobu and Kitani, Kris}, title = {Leveraging Temporal Cues for Semi-Supervised Multi-View 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27401-27412} }
Self-supervised ControlNet with Spatio-Temporal Mamba for Real-world Video Super-resolution: Shijun Shi,

Jing Xu,

Lijing Lu,

Zhihang Li,

Kai Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Shijun and Xu, Jing and Lu, Lijing and Li, Zhihang and Hu, Kai}, title = {Self-supervised ControlNet with Spatio-Temporal Mamba for Real-world Video Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7385-7395} }
LATTE-MV: Learning to Anticipate Table Tennis Hits from Monocular Videos: Daniel Etaat,

Dvij Kalaria,

Nima Rahmanian,

S. Shankar Sastry; [pdf] [supp]
[bibtex]
@InProceedings{Etaat_2025_CVPR, author = {Etaat, Daniel and Kalaria, Dvij and Rahmanian, Nima and Sastry, S. Shankar}, title = {LATTE-MV: Learning to Anticipate Table Tennis Hits from Monocular Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7115-7124} }
Logits DeConfusion with CLIP for Few-Shot Learning: Shuo Li,

Fang Liu,

Zehua Hao,

Xinyi Wang,

Lingling Li,

Xu Liu,

Puhua Chen,

Wenping Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shuo and Liu, Fang and Hao, Zehua and Wang, Xinyi and Li, Lingling and Liu, Xu and Chen, Puhua and Ma, Wenping}, title = {Logits DeConfusion with CLIP for Few-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25411-25421} }
Distilling Spatially-Heterogeneous Distortion Perception for Blind Image Quality Assessment: Xudong Li,

Wenjie Nie,

Yan Zhang,

Runze Hu,

Ke Li,

Xiawu Zheng,

Liujuan Cao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xudong and Nie, Wenjie and Zhang, Yan and Hu, Runze and Li, Ke and Zheng, Xiawu and Cao, Liujuan}, title = {Distilling Spatially-Heterogeneous Distortion Perception for Blind Image Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2344-2354} }
Pay Attention to the Foreground in Object-Centric Learning: Pinzhuo Tian,

Shengjie Yang,

Hang Yu,

Alex Kot; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Pinzhuo and Yang, Shengjie and Yu, Hang and Kot, Alex}, title = {Pay Attention to the Foreground in Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30281-30290} }
2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification: Jingwei Zhang,

Anh Tien Nguyen,

Xi Han,

Vincent Quoc-Huy Trinh,

Hong Qin,

Dimitris Samaras,

Mahdi S. Hosseini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jingwei and Nguyen, Anh Tien and Han, Xi and Trinh, Vincent Quoc-Huy and Qin, Hong and Samaras, Dimitris and Hosseini, Mahdi S.}, title = {2DMamba: Efficient State Space Model for Image Representation with Applications on Giga-Pixel Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3583-3592} }
Unboxed: Geometrically and Temporally Consistent Video Outpainting: Zhongrui Yu,

Martina Megaro-Boldini,

Robert W. Sumner,

Abdelaziz Djelouah; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Zhongrui and Megaro-Boldini, Martina and Sumner, Robert W. and Djelouah, Abdelaziz}, title = {Unboxed: Geometrically and Temporally Consistent Video Outpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7309-7319} }
K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences: Zhikai Li,

Xuewen Liu,

Dongrong Joe Fu,

Jianquan Li,

Qingyi Gu,

Kurt Keutzer,

Zhen Dong; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhikai and Liu, Xuewen and Fu, Dongrong Joe and Li, Jianquan and Gu, Qingyi and Keutzer, Kurt and Dong, Zhen}, title = {K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9131-9141} }
Seeking Consistent Flat Minima for Better Domain Generalization via Refining Loss Landscapes: Aodi Li,

Liansheng Zhuang,

Xiao Long,

Minghong Yao,

Shafei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Aodi and Zhuang, Liansheng and Long, Xiao and Yao, Minghong and Wang, Shafei}, title = {Seeking Consistent Flat Minima for Better Domain Generalization via Refining Loss Landscapes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15349-15359} }
MultimodalStudio: A Heterogeneous Sensor Dataset and Framework for Neural Rendering across Multiple Imaging Modalities: Federico Lincetto,

Gianluca Agresti,

Mattia Rossi,

Pietro Zanuttigh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lincetto_2025_CVPR, author = {Lincetto, Federico and Agresti, Gianluca and Rossi, Mattia and Zanuttigh, Pietro}, title = {MultimodalStudio: A Heterogeneous Sensor Dataset and Framework for Neural Rendering across Multiple Imaging Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10964-10973} }
Dense-SfM: Structure from Motion with Dense Consistent Matching: JongMin Lee,

Sungjoo Yoo; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, JongMin and Yoo, Sungjoo}, title = {Dense-SfM: Structure from Motion with Dense Consistent Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6404-6414} }
FluidNexus: 3D Fluid Reconstruction and Prediction from a Single Video: Yue Gao,

Hong-Xing Yu,

Bo Zhu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Yue and Yu, Hong-Xing and Zhu, Bo and Wu, Jiajun}, title = {FluidNexus: 3D Fluid Reconstruction and Prediction from a Single Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26091-26101} }
MuTri: Multi-view Tri-alignment for OCT to OCTA 3D Image Translation: Zhuangzhuang Chen,

Hualiang Wang,

Chubin Ou,

Xiaomeng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhuangzhuang and Wang, Hualiang and Ou, Chubin and Li, Xiaomeng}, title = {MuTri: Multi-view Tri-alignment for OCT to OCTA 3D Image Translation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20885-20894} }
Sketchy Bounding-box Supervision for 3D Instance Segmentation: Qian Deng,

Le Hui,

Jin Xie,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Qian and Hui, Le and Xie, Jin and Yang, Jian}, title = {Sketchy Bounding-box Supervision for 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8879-8888} }
Image Quality Assessment: Investigating Causal Perceptual Effects with Abductive Counterfactual Inference: Wenhao Shen,

Mingliang Zhou,

Yu Chen,

Xuekai Wei,

Yong Feng,

Huayan Pu,

Weijia Jia; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Wenhao and Zhou, Mingliang and Chen, Yu and Wei, Xuekai and Feng, Yong and Pu, Huayan and Jia, Weijia}, title = {Image Quality Assessment: Investigating Causal Perceptual Effects with Abductive Counterfactual Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17990-17999} }
Pos3R: 6D Pose Estimation for Unseen Objects Made Easy: Weijian Deng,

Dylan Campbell,

Chunyi Sun,

Jiahao Zhang,

Shubham Kanitkar,

Matt E. Shaffer,

Stephen Gould; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Weijian and Campbell, Dylan and Sun, Chunyi and Zhang, Jiahao and Kanitkar, Shubham and Shaffer, Matt E. and Gould, Stephen}, title = {Pos3R: 6D Pose Estimation for Unseen Objects Made Easy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16818-16828} }
DeformCL: Learning Deformable Centerline Representation for Vessel Extraction in 3D Medical Image: Ziwei Zhao,

Zhixing Zhang,

Yuhang Liu,

Zhao Zhang,

Haojun Yu,

Dong Wang,

Liwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ziwei and Zhang, Zhixing and Liu, Yuhang and Zhang, Zhao and Yu, Haojun and Wang, Dong and Wang, Liwei}, title = {DeformCL: Learning Deformable Centerline Representation for Vessel Extraction in 3D Medical Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30896-30905} }
StreetCrafter: Street View Synthesis with Controllable Video Diffusion Models: Yunzhi Yan,

Zhen Xu,

Haotong Lin,

Haian Jin,

Haoyu Guo,

Yida Wang,

Kun Zhan,

Xianpeng Lang,

Hujun Bao,

Xiaowei Zhou,

Sida Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Yunzhi and Xu, Zhen and Lin, Haotong and Jin, Haian and Guo, Haoyu and Wang, Yida and Zhan, Kun and Lang, Xianpeng and Bao, Hujun and Zhou, Xiaowei and Peng, Sida}, title = {StreetCrafter: Street View Synthesis with Controllable Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {822-832} }
OCRT: Boosting Foundation Models in the Open World with Object-Concept-Relation Triad: Luyao Tang,

Yuxuan Yuan,

Chaoqi Chen,

Zeyu Zhang,

Yue Huang,

Kun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Luyao and Yuan, Yuxuan and Chen, Chaoqi and Zhang, Zeyu and Huang, Yue and Zhang, Kun}, title = {OCRT: Boosting Foundation Models in the Open World with Object-Concept-Relation Triad}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25422-25433} }
SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction: Yutao Tang,

Yuxiang Guo,

Deming Li,

Cheng Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yutao and Guo, Yuxiang and Li, Deming and Peng, Cheng}, title = {SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26810-26821} }
VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation: Hanzhi Chen,

Boyang Sun,

Anran Zhang,

Marc Pollefeys,

Stefan Leutenegger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Hanzhi and Sun, Boyang and Zhang, Anran and Pollefeys, Marc and Leutenegger, Stefan}, title = {VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27661-27672} }
Learning Person-Specific Animatable Face Models from In-the-Wild Images via a Shared Base Model: Yuxiang Mao,

Zhenfeng Fan,

ZhiJie Zhang,

Zhiheng Zhang,

Shihong Xia; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Yuxiang and Fan, Zhenfeng and Zhang, ZhiJie and Zhang, Zhiheng and Xia, Shihong}, title = {Learning Person-Specific Animatable Face Models from In-the-Wild Images via a Shared Base Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5602-5613} }
TIMotion: Temporal and Interactive Framework for Efficient Human-Human Motion Generation: Yabiao Wang,

Shuo Wang,

Jiangning Zhang,

Ke Fan,

Jiafu Wu,

Zhucun Xue,

Yong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yabiao and Wang, Shuo and Zhang, Jiangning and Fan, Ke and Wu, Jiafu and Xue, Zhucun and Liu, Yong}, title = {TIMotion: Temporal and Interactive Framework for Efficient Human-Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7169-7178} }
Which Viewpoint Shows it Best? Language for Weakly Supervising View Selection in Multi-view Instructional Videos: Sagnik Majumder,

Tushar Nagarajan,

Ziad Al-Halah,

Reina Pradhan,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Majumder_2025_CVPR, author = {Majumder, Sagnik and Nagarajan, Tushar and Al-Halah, Ziad and Pradhan, Reina and Grauman, Kristen}, title = {Which Viewpoint Shows it Best? Language for Weakly Supervising View Selection in Multi-view Instructional Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29016-29028} }
RaCFormer: Towards High-Quality 3D Object Detection via Query-based Radar-Camera Fusion: Xiaomeng Chu,

Jiajun Deng,

Guoliang You,

Yifan Duan,

Houqiang Li,

Yanyong Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chu_2025_CVPR, author = {Chu, Xiaomeng and Deng, Jiajun and You, Guoliang and Duan, Yifan and Li, Houqiang and Zhang, Yanyong}, title = {RaCFormer: Towards High-Quality 3D Object Detection via Query-based Radar-Camera Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17081-17091} }
Hybrid Reciprocal Transformer with Triplet Feature Alignment for Scene Graph Generation: Jiawei Fu,

Tiantian Zhang,

Kai Chen,

Qi Dou; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Jiawei and Zhang, Tiantian and Chen, Kai and Dou, Qi}, title = {Hybrid Reciprocal Transformer with Triplet Feature Alignment for Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8953-8963} }
Understanding Multi-Task Activities from Single-Task Videos: Yuhan Shen,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Yuhan and Elhamifar, Ehsan}, title = {Understanding Multi-Task Activities from Single-Task Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19120-19131} }
Co-Speech Gesture Video Generation with Implicit Motion-Audio Entanglement: Xinjie Li,

Ziyi Chen,

Xinlu Yu,

Iek-Heng Chu,

Peng Chang,

Jing Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xinjie and Chen, Ziyi and Yu, Xinlu and Chu, Iek-Heng and Chang, Peng and Xiao, Jing}, title = {Co-Speech Gesture Video Generation with Implicit Motion-Audio Entanglement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11384-11394} }
TransPixeler: Advancing Text-to-Video Generation with Transparency: Luozhou Wang,

Yijun Li,

Zhifei Chen,

Jui-Hsien Wang,

Zhifei Zhang,

He Zhang,

Zhe Lin,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Luozhou and Li, Yijun and Chen, Zhifei and Wang, Jui-Hsien and Zhang, Zhifei and Zhang, He and Lin, Zhe and Chen, Ying-Cong}, title = {TransPixeler: Advancing Text-to-Video Generation with Transparency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18229-18239} }
Adaptive Keyframe Sampling for Long Video Understanding: Xi Tang,

Jihao Qiu,

Lingxi Xie,

Yunjie Tian,

Jianbin Jiao,

Qixiang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Xi and Qiu, Jihao and Xie, Lingxi and Tian, Yunjie and Jiao, Jianbin and Ye, Qixiang}, title = {Adaptive Keyframe Sampling for Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29118-29128} }
What's in the Image? A Deep-Dive into the Vision of Vision Language Models: Omri Kaduri,

Shai Bagon,

Tali Dekel; [pdf] [supp]
[bibtex]
@InProceedings{Kaduri_2025_CVPR, author = {Kaduri, Omri and Bagon, Shai and Dekel, Tali}, title = {What's in the Image? A Deep-Dive into the Vision of Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14549-14558} }
Person De-reidentification: A Variation-guided Identity Shift Modeling: Yi-Xing Peng,

Yu-Ming Tang,

Kun-Yu Lin,

Qize Yang,

Jingke Meng,

Xihan Wei,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Yi-Xing and Tang, Yu-Ming and Lin, Kun-Yu and Yang, Qize and Meng, Jingke and Wei, Xihan and Zheng, Wei-Shi}, title = {Person De-reidentification: A Variation-guided Identity Shift Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29331-29341} }
FreeSim: Toward Free-viewpoint Camera Simulation in Driving Scenes: Lue Fan,

Hao Zhang,

Qitai Wang,

Hongsheng Li,

Zhaoxiang Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Lue and Zhang, Hao and Wang, Qitai and Li, Hongsheng and Zhang, Zhaoxiang}, title = {FreeSim: Toward Free-viewpoint Camera Simulation in Driving Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12004-12014} }
Gradient Inversion Attacks on Parameter-Efficient Fine-Tuning: Hasin Us Sami,

Swapneel Sen,

Amit K. Roy-Chowdhury,

Srikanth V. Krishnamurthy,

Basak Guler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sami_2025_CVPR, author = {Sami, Hasin Us and Sen, Swapneel and Roy-Chowdhury, Amit K. and Krishnamurthy, Srikanth V. and Guler, Basak}, title = {Gradient Inversion Attacks on Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10224-10234} }
UPME: An Unsupervised Peer Review Framework for Multimodal Large Language Model Evaluation: Qihui Zhang,

Munan Ning,

Zheyuan Liu,

Yue Huang,

Shuo Yang,

Yanbo Wang,

Jiayi Ye,

Xiao Chen,

Yibing Song,

Li Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qihui and Ning, Munan and Liu, Zheyuan and Huang, Yue and Yang, Shuo and Wang, Yanbo and Ye, Jiayi and Chen, Xiao and Song, Yibing and Yuan, Li}, title = {UPME: An Unsupervised Peer Review Framework for Multimodal Large Language Model Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9165-9174} }
DiGIT: Multi-Dilated Gated Encoder and Central-Adjacent Region Integrated Decoder for Temporal Action Detection Transformer: Ho-Joong Kim,

Yearang Lee,

Jung-Ho Hong,

Seong-Whan Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Ho-Joong and Lee, Yearang and Hong, Jung-Ho and Lee, Seong-Whan}, title = {DiGIT: Multi-Dilated Gated Encoder and Central-Adjacent Region Integrated Decoder for Temporal Action Detection Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24286-24296} }
MBQ: Modality-Balanced Quantization for Large Vision-Language Models: Shiyao Li,

Yingchun Hu,

Xuefei Ning,

Xihui Liu,

Ke Hong,

Xiaotao Jia,

Xiuhong Li,

Yaqi Yan,

Pei Ran,

Guohao Dai,

Shengen Yan,

Huazhong Yang,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shiyao and Hu, Yingchun and Ning, Xuefei and Liu, Xihui and Hong, Ke and Jia, Xiaotao and Li, Xiuhong and Yan, Yaqi and Ran, Pei and Dai, Guohao and Yan, Shengen and Yang, Huazhong and Wang, Yu}, title = {MBQ: Modality-Balanced Quantization for Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4167-4177} }
Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion: Jiuhai Chen,

Jianwei Yang,

Haiping Wu,

Dianqi Li,

Jianfeng Gao,

Tianyi Zhou,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jiuhai and Yang, Jianwei and Wu, Haiping and Li, Dianqi and Gao, Jianfeng and Zhou, Tianyi and Xiao, Bin}, title = {Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24928-24938} }
VideoDPO: Omni-Preference Alignment for Video Diffusion Generation: Runtao Liu,

Haoyu Wu,

Ziqiang Zheng,

Chen Wei,

Yingqing He,

Renjie Pi,

Qifeng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Runtao and Wu, Haoyu and Zheng, Ziqiang and Wei, Chen and He, Yingqing and Pi, Renjie and Chen, Qifeng}, title = {VideoDPO: Omni-Preference Alignment for Video Diffusion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8009-8019} }
Seq2Time: Sequential Knowledge Transfer for Video LLM Temporal Grounding: Andong Deng,

Zhongpai Gao,

Anwesa Choudhuri,

Benjamin Planche,

Meng Zheng,

Bin Wang,

Terrence Chen,

Chen Chen,

Ziyan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Andong and Gao, Zhongpai and Choudhuri, Anwesa and Planche, Benjamin and Zheng, Meng and Wang, Bin and Chen, Terrence and Chen, Chen and Wu, Ziyan}, title = {Seq2Time: Sequential Knowledge Transfer for Video LLM Temporal Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13766-13775} }
GPVK-VL: Geometry-Preserving Virtual Keyframes for Visual Localization under Large Viewpoint Changes: Yunxuan Li,

Lei Fan,

Xiaoying Xing,

Jianxiong Zhou,

Ying Wu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yunxuan and Fan, Lei and Xing, Xiaoying and Zhou, Jianxiong and Wu, Ying}, title = {GPVK-VL: Geometry-Preserving Virtual Keyframes for Visual Localization under Large Viewpoint Changes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16728-16738} }
Realistic Test-Time Adaptation of Vision-Language Models: Maxime Zanella,

Clément Fuchs,

Christophe De Vleeschouwer,

Ismail Ben Ayed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zanella_2025_CVPR, author = {Zanella, Maxime and Fuchs, Cl\'ement and De Vleeschouwer, Christophe and Ben Ayed, Ismail}, title = {Realistic Test-Time Adaptation of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25103-25112} }
SelfSplat: Pose-Free and 3D Prior-Free Generalizable 3D Gaussian Splatting: Gyeongjin Kang,

Jisang Yoo,

Jihyeon Park,

Seungtae Nam,

Hyeonsoo Im,

Sangheon Shin,

Sangpil Kim,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Gyeongjin and Yoo, Jisang and Park, Jihyeon and Nam, Seungtae and Im, Hyeonsoo and Shin, Sangheon and Kim, Sangpil and Park, Eunbyung}, title = {SelfSplat: Pose-Free and 3D Prior-Free Generalizable 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22012-22022} }
Enhancing Virtual Try-On with Synthetic Pairs and Error-Aware Noise Scheduling: Nannan Li,

Kevin J. Shih,

Bryan A. Plummer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Nannan and Shih, Kevin J. and Plummer, Bryan A.}, title = {Enhancing Virtual Try-On with Synthetic Pairs and Error-Aware Noise Scheduling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21238-21247} }
Exploring Simple Open-Vocabulary Semantic Segmentation: Zihang Lai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Zihang}, title = {Exploring Simple Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30221-30230} }
MP-GUI: Modality Perception with MLLMs for GUI Understanding: Ziwei Wang,

Weizhi Chen,

Leyang Yang,

Sheng Zhou,

Shengchu Zhao,

Hanbei Zhan,

Jiongchao Jin,

Liangcheng Li,

Zirui Shao,

Jiajun Bu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ziwei and Chen, Weizhi and Yang, Leyang and Zhou, Sheng and Zhao, Shengchu and Zhan, Hanbei and Jin, Jiongchao and Li, Liangcheng and Shao, Zirui and Bu, Jiajun}, title = {MP-GUI: Modality Perception with MLLMs for GUI Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29711-29721} }
Associative Transformer: Yuwei Sun,

Hideya Ochiai,

Zhirong Wu,

Stephen Lin,

Ryota Kanai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yuwei and Ochiai, Hideya and Wu, Zhirong and Lin, Stephen and Kanai, Ryota}, title = {Associative Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4518-4527} }
Improving Adversarial Transferability on Vision Transformers via Forward Propagation Refinement: Yuchen Ren,

Zhengyu Zhao,

Chenhao Lin,

Bo Yang,

Lu Zhou,

Zhe Liu,

Chao Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Yuchen and Zhao, Zhengyu and Lin, Chenhao and Yang, Bo and Zhou, Lu and Liu, Zhe and Shen, Chao}, title = {Improving Adversarial Transferability on Vision Transformers via Forward Propagation Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25071-25080} }
Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection: Gensheng Pei,

Tao Chen,

Yujia Wang,

Xinhao Cai,

Xiangbo Shu,

Tianfei Zhou,

Yazhou Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2025_CVPR, author = {Pei, Gensheng and Chen, Tao and Wang, Yujia and Cai, Xinhao and Shu, Xiangbo and Zhou, Tianfei and Yao, Yazhou}, title = {Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24862-24872} }
ChatGarment: Garment Estimation, Generation and Editing via Large Language Models: Siyuan Bian,

Chenghao Xu,

Yuliang Xiu,

Artur Grigorev,

Zhen Liu,

Cewu Lu,

Michael J. Black,

Yao Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bian_2025_CVPR, author = {Bian, Siyuan and Xu, Chenghao and Xiu, Yuliang and Grigorev, Artur and Liu, Zhen and Lu, Cewu and Black, Michael J. and Feng, Yao}, title = {ChatGarment: Garment Estimation, Generation and Editing via Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2924-2934} }
Enhancing Online Continual Learning with Plug-and-Play State Space Model and Class-Conditional Mixture of Discretization: Sihao Liu,

Yibo Yang,

Xiaojie Li,

David A. Clifton,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Sihao and Yang, Yibo and Li, Xiaojie and Clifton, David A. and Ghanem, Bernard}, title = {Enhancing Online Continual Learning with Plug-and-Play State Space Model and Class-Conditional Mixture of Discretization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20502-20511} }
RDD: Robust Feature Detector and Descriptor using Deformable Transformer: Gonglin Chen,

Tianwen Fu,

Haiwei Chen,

Wenbin Teng,

Hanyuan Xiao,

Yajie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Gonglin and Fu, Tianwen and Chen, Haiwei and Teng, Wenbin and Xiao, Hanyuan and Zhao, Yajie}, title = {RDD: Robust Feature Detector and Descriptor using Deformable Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6394-6403} }
Building Vision Models upon Heat Conduction: Zhaozhi Wang,

Yue Liu,

Yunjie Tian,

Yunfan Liu,

Yaowei Wang,

Qixiang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhaozhi and Liu, Yue and Tian, Yunjie and Liu, Yunfan and Wang, Yaowei and Ye, Qixiang}, title = {Building Vision Models upon Heat Conduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9707-9717} }
GRAPHGPT-O: Synergistic Multimodal Comprehension and Generation on Graphs: Yi Fang,

Bowen Jin,

Jiacheng Shen,

Sirui Ding,

Qiaoyu Tan,

Jiawei Han; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Yi and Jin, Bowen and Shen, Jiacheng and Ding, Sirui and Tan, Qiaoyu and Han, Jiawei}, title = {GRAPHGPT-O: Synergistic Multimodal Comprehension and Generation on Graphs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19467-19476} }
Model Poisoning Attacks to Federated Learning via Multi-Round Consistency: Yueqi Xie,

Minghong Fang,

Neil Zhenqiang Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yueqi and Fang, Minghong and Gong, Neil Zhenqiang}, title = {Model Poisoning Attacks to Federated Learning via Multi-Round Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15454-15463} }
TaoAvatar: Real-Time Lifelike Full-Body Talking Avatars for Augmented Reality via 3D Gaussian Splatting: Jianchuan Chen,

Jingchuan Hu,

Gaige Wang,

Zhonghua Jiang,

Tiansong Zhou,

Zhiwen Chen,

Chengfei Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jianchuan and Hu, Jingchuan and Wang, Gaige and Jiang, Zhonghua and Zhou, Tiansong and Chen, Zhiwen and Lv, Chengfei}, title = {TaoAvatar: Real-Time Lifelike Full-Body Talking Avatars for Augmented Reality via 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10723-10734} }
Erasing Undesirable Influence in Diffusion Models: Jing Wu,

Trung Le,

Munawar Hayat,

Mehrtash Harandi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jing and Le, Trung and Hayat, Munawar and Harandi, Mehrtash}, title = {Erasing Undesirable Influence in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28263-28273} }
LT3SD: Latent Trees for 3D Scene Diffusion: Quan Meng,

Lei Li,

Matthias Nießner,

Angela Dai; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Quan and Li, Lei and Nie{\ss}ner, Matthias and Dai, Angela}, title = {LT3SD: Latent Trees for 3D Scene Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {650-660} }
Stacking Brick by Brick: Aligned Feature Isolation for Incremental Face Forgery Detection: Jikang Cheng,

Zhiyuan Yan,

Ying Zhang,

Li Hao,

Jiaxin Ai,

Qin Zou,

Chen Li,

Zhongyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Jikang and Yan, Zhiyuan and Zhang, Ying and Hao, Li and Ai, Jiaxin and Zou, Qin and Li, Chen and Wang, Zhongyuan}, title = {Stacking Brick by Brick: Aligned Feature Isolation for Incremental Face Forgery Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13927-13936} }
CO-SPY: Combining Semantic and Pixel Features to Detect Synthetic Images by AI: Siyuan Cheng,

Lingjuan Lyu,

Zhenting Wang,

Xiangyu Zhang,

Vikash Sehwag; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Siyuan and Lyu, Lingjuan and Wang, Zhenting and Zhang, Xiangyu and Sehwag, Vikash}, title = {CO-SPY: Combining Semantic and Pixel Features to Detect Synthetic Images by AI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13455-13465} }
Closest Neighbors are Harmful for Lightweight Masked Auto-encoders: Jian Meng,

Ahmed Hasssan,

Li Yang,

Deliang Fan,

Jinwoo Shin,

Jae-sun Seo; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Jian and Hasssan, Ahmed and Yang, Li and Fan, Deliang and Shin, Jinwoo and Seo, Jae-sun}, title = {Closest Neighbors are Harmful for Lightweight Masked Auto-encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25230-25239} }
CraftsMan3D: High-fidelity Mesh Generation with 3D Native Diffusion and Interactive Geometry Refiner: Weiyu Li,

Jiarui Liu,

Hongyu Yan,

Rui Chen,

Yixun Liang,

Xuelin Chen,

Ping Tan,

Xiaoxiao Long; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Weiyu and Liu, Jiarui and Yan, Hongyu and Chen, Rui and Liang, Yixun and Chen, Xuelin and Tan, Ping and Long, Xiaoxiao}, title = {CraftsMan3D: High-fidelity Mesh Generation with 3D Native Diffusion and Interactive Geometry Refiner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5307-5317} }
Decouple-Then-Merge: Finetune Diffusion Models as Multi-Task Learning: Qianli Ma,

Xuefei Ning,

Dongrui Liu,

Li Niu,

Linfeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Qianli and Ning, Xuefei and Liu, Dongrui and Niu, Li and Zhang, Linfeng}, title = {Decouple-Then-Merge: Finetune Diffusion Models as Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23281-23291} }
GIF: Generative Inspiration for Face Recognition at Scale: Saeed Ebrahimi,

Sahar Rahimi,

Ali Dabouei,

Srinjoy Das,

Jeremy M. Dawson,

Nasser M. Nasrabadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ebrahimi_2025_CVPR, author = {Ebrahimi, Saeed and Rahimi, Sahar and Dabouei, Ali and Das, Srinjoy and Dawson, Jeremy M. and Nasrabadi, Nasser M.}, title = {GIF: Generative Inspiration for Face Recognition at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3528-3539} }
HELVIPAD: A Real-World Dataset for Omnidirectional Stereo Depth Estimation: Mehdi Zayene,

Jannik Endres,

Albias Havolli,

Charles Corbière,

Salim Cherkaoui,

Alexandre Kontouli,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zayene_2025_CVPR, author = {Zayene, Mehdi and Endres, Jannik and Havolli, Albias and Corbi\`ere, Charles and Cherkaoui, Salim and Kontouli, Alexandre and Alahi, Alexandre}, title = {HELVIPAD: A Real-World Dataset for Omnidirectional Stereo Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26975-26984} }
GENMANIP: LLM-driven Simulation for Generalizable Instruction-Following Manipulation: Ning Gao,

Yilun Chen,

Shuai Yang,

Xinyi Chen,

Yang Tian,

Hao Li,

Haifeng Huang,

Hanqing Wang,

Tai Wang,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Ning and Chen, Yilun and Yang, Shuai and Chen, Xinyi and Tian, Yang and Li, Hao and Huang, Haifeng and Wang, Hanqing and Wang, Tai and Pang, Jiangmiao}, title = {GENMANIP: LLM-driven Simulation for Generalizable Instruction-Following Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12187-12198} }
SKDream: Controllable Multi-view and 3D Generation with Arbitrary Skeletons: Yuanyou Xu,

Zongxin Yang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yuanyou and Yang, Zongxin and Yang, Yi}, title = {SKDream: Controllable Multi-view and 3D Generation with Arbitrary Skeletons}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {314-325} }
Towards Enhanced Image Inpainting: Mitigating Unwanted Object Insertion and Preserving Color Consistency: Yikai Wang,

Chenjie Cao,

Junqiu Yu,

Ke Fan,

Xiangyang Xue,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yikai and Cao, Chenjie and Yu, Junqiu and Fan, Ke and Xue, Xiangyang and Fu, Yanwei}, title = {Towards Enhanced Image Inpainting: Mitigating Unwanted Object Insertion and Preserving Color Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23237-23248} }
Optimus-2: Multimodal Minecraft Agent with Goal-Observation-Action Conditioned Policy: Zaijing Li,

Yuquan Xie,

Rui Shao,

Gongwei Chen,

Dongmei Jiang,

Liqiang Nie; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zaijing and Xie, Yuquan and Shao, Rui and Chen, Gongwei and Jiang, Dongmei and Nie, Liqiang}, title = {Optimus-2: Multimodal Minecraft Agent with Goal-Observation-Action Conditioned Policy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9039-9049} }
Classic Video Denoising in a Machine Learning World: Robust, Fast, and Controllable: Xin Jin,

Simon Niklaus,

Zhoutong Zhang,

Zhihao Xia,

Chunle Guo,

Yuting Yang,

Jiawen Chen,

Chongyi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Xin and Niklaus, Simon and Zhang, Zhoutong and Xia, Zhihao and Guo, Chunle and Yang, Yuting and Chen, Jiawen and Li, Chongyi}, title = {Classic Video Denoising in a Machine Learning World: Robust, Fast, and Controllable}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2084-2093} }
Practical Solutions to the Relative Pose of Three Calibrated Cameras: Charalambos Tzamos,

Viktor Kocur,

Yaqing Ding,

Daniel Barath,

Zuzana Berger Haladova,

Torsten Sattler,

Zuzana Kukelova; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tzamos_2025_CVPR, author = {Tzamos, Charalambos and Kocur, Viktor and Ding, Yaqing and Barath, Daniel and Haladova, Zuzana Berger and Sattler, Torsten and Kukelova, Zuzana}, title = {Practical Solutions to the Relative Pose of Three Calibrated Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21913-21923} }
Localized Concept Erasure for Text-to-Image Diffusion Models Using Training-Free Gated Low-Rank Adaptation: Byung Hyun Lee,

Sungjin Lim,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Byung Hyun and Lim, Sungjin and Chun, Se Young}, title = {Localized Concept Erasure for Text-to-Image Diffusion Models Using Training-Free Gated Low-Rank Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18596-18606} }
PARC: A Quantitative Framework Uncovering the Symmetries within Vision Language Models: Jenny Schmalfuss,

Nadine Chang,

Vibashan VS,

Maying Shen,

Andres Bruhn,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schmalfuss_2025_CVPR, author = {Schmalfuss, Jenny and Chang, Nadine and VS, Vibashan and Shen, Maying and Bruhn, Andres and Alvarez, Jose M.}, title = {PARC: A Quantitative Framework Uncovering the Symmetries within Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25081-25091} }
RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins: Yao Mu,

Tianxing Chen,

Zanxin Chen,

Shijia Peng,

Zhiqian Lan,

Zeyu Gao,

Zhixuan Liang,

Qiaojun Yu,

Yude Zou,

Mingkun Xu,

Lunkai Lin,

Zhiqiang Xie,

Mingyu Ding,

Ping Luo; [pdf] [supp]
[bibtex]
@InProceedings{Mu_2025_CVPR, author = {Mu, Yao and Chen, Tianxing and Chen, Zanxin and Peng, Shijia and Lan, Zhiqian and Gao, Zeyu and Liang, Zhixuan and Yu, Qiaojun and Zou, Yude and Xu, Mingkun and Lin, Lunkai and Xie, Zhiqiang and Ding, Mingyu and Luo, Ping}, title = {RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27649-27660} }
Population Normalization for Federated Learning: Zhuoyao Wang,

Fan Yi,

Peizhu Gong,

Caitou He,

Cheng Jin,

Weizhong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhuoyao and Yi, Fan and Gong, Peizhu and He, Caitou and Jin, Cheng and Zhang, Weizhong}, title = {Population Normalization for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10214-10223} }
AnimateAnything: Consistent and Controllable Animation for Video Generation: Guojun Lei,

Chi Wang,

Rong Zhang,

Yikai Wang,

Hong Li,

Weiwei Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Guojun and Wang, Chi and Zhang, Rong and Wang, Yikai and Li, Hong and Xu, Weiwei}, title = {AnimateAnything: Consistent and Controllable Animation for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27946-27956} }
PRaDA: Projective Radial Distortion Averaging: Daniil Sinitsyn,

Linus Härenstam-Nielsen,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Sinitsyn_2025_CVPR, author = {Sinitsyn, Daniil and H\"arenstam-Nielsen, Linus and Cremers, Daniel}, title = {PRaDA: Projective Radial Distortion Averaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21902-21912} }
GenAssets: Generating in-the-wild 3D Assets in Latent Space: Ze Yang,

Jingkang Wang,

Haowei Zhang,

Sivabalan Manivasagam,

Yun Chen,

Raquel Urtasun; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Ze and Wang, Jingkang and Zhang, Haowei and Manivasagam, Sivabalan and Chen, Yun and Urtasun, Raquel}, title = {GenAssets: Generating in-the-wild 3D Assets in Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22392-22403} }
RipVIS: Rip Currents Video Instance Segmentation Benchmark for Beach Monitoring and Safety: Andrei Dumitriu,

Florin Tatui,

Florin Miron,

Aakash Ralhan,

Radu Tudor Ionescu,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dumitriu_2025_CVPR, author = {Dumitriu, Andrei and Tatui, Florin and Miron, Florin and Ralhan, Aakash and Ionescu, Radu Tudor and Timofte, Radu}, title = {RipVIS: Rip Currents Video Instance Segmentation Benchmark for Beach Monitoring and Safety}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3427-3437} }
Low-Rank Adaptation in Multilinear Operator Networks for Security-Preserving Incremental Learning: Huu Binh Ta,

Duc Nguyen,

Quyen Tran,

Toan Tran,

Tung Pham; [pdf] [supp]
[bibtex]
@InProceedings{Ta_2025_CVPR, author = {Ta, Huu Binh and Nguyen, Duc and Tran, Quyen and Tran, Toan and Pham, Tung}, title = {Low-Rank Adaptation in Multilinear Operator Networks for Security-Preserving Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24341-24350} }
Camera Resection from Known Line Pencils and a Radially Distorted Scanline: Juan C. Dibene,

Enrique Dunn; [pdf]
[bibtex]
@InProceedings{Dibene_2025_CVPR, author = {Dibene, Juan C. and Dunn, Enrique}, title = {Camera Resection from Known Line Pencils and a Radially Distorted Scanline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15843-15851} }
ESCAPE: Equivariant Shape Completion via Anchor Point Encoding: Burak Bekci,

Nassir Navab,

Federico Tombari,

Mahdi Saleh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bekci_2025_CVPR, author = {Bekci, Burak and Navab, Nassir and Tombari, Federico and Saleh, Mahdi}, title = {ESCAPE: Equivariant Shape Completion via Anchor Point Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6480-6489} }
SPC-GS: Gaussian Splatting with Semantic-Prompt Consistency for Indoor Open-World Free-view Synthesis from Sparse Inputs: Guibiao Liao,

Qing Li,

Zhenyu Bao,

Guoping Qiu,

Kanglin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Guibiao and Li, Qing and Bao, Zhenyu and Qiu, Guoping and Liu, Kanglin}, title = {SPC-GS: Gaussian Splatting with Semantic-Prompt Consistency for Indoor Open-World Free-view Synthesis from Sparse Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11264-11274} }
M3amba: Memory Mamba is All You Need for Whole Slide Image Classification: Tingting Zheng,

Kui Jiang,

Yi Xiao,

Sicheng Zhao,

Hongxun Yao; [pdf]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Tingting and Jiang, Kui and Xiao, Yi and Zhao, Sicheng and Yao, Hongxun}, title = {M3amba: Memory Mamba is All You Need for Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15601-15610} }
Satellite to GroundScape - Large-scale Consistent Ground View Generation from Satellite Views: Ningli Xu,

Rongjun Qin; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Ningli and Qin, Rongjun}, title = {Satellite to GroundScape - Large-scale Consistent Ground View Generation from Satellite Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6068-6077} }
Variance-Based Membership Inference Attacks Against Large-Scale Image Captioning Models: Daniel Samira,

Edan Habler,

Yuval Elovici,

Asaf Shabtai; [pdf] [supp]
[bibtex]
@InProceedings{Samira_2025_CVPR, author = {Samira, Daniel and Habler, Edan and Elovici, Yuval and Shabtai, Asaf}, title = {Variance-Based Membership Inference Attacks Against Large-Scale Image Captioning Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9210-9219} }
Redefining <Creative> in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation: Fu Feng,

Yucheng Xie,

Xu Yang,

Jing Wang,

Xin Geng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Fu and Xie, Yucheng and Yang, Xu and Wang, Jing and Geng, Xin}, title = {Redefining \ensuremath{<}Creative\ensuremath{>} in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18444-18454} }
FiRe: Fixed-points of Restoration Priors for Solving Inverse Problems: Matthieu Terris,

Ulugbek S. Kamilov,

Thomas Moreau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Terris_2025_CVPR, author = {Terris, Matthieu and Kamilov, Ulugbek S. and Moreau, Thomas}, title = {FiRe: Fixed-points of Restoration Priors for Solving Inverse Problems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23185-23194} }
Learning Dynamic Collaborative Network for Semi-supervised 3D Vessel Segmentation: Jiao Xu,

Xin Chen,

Lihe Zhang; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jiao and Chen, Xin and Zhang, Lihe}, title = {Learning Dynamic Collaborative Network for Semi-supervised 3D Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10445-10454} }
Temporal Alignment-Free Video Matching for Few-shot Action Recognition: SuBeen Lee,

WonJun Moon,

Hyun Seok Seong,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, SuBeen and Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Temporal Alignment-Free Video Matching for Few-shot Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5412-5421} }
OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP: Mohamad Hassan N C,

Divyam Gupta,

Mainak Singha,

Sai Bhargav Rongali,

Ankit Jha,

Muhammad Haris Khan,

Biplab Banerjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{C_2025_CVPR, author = {C, Mohamad Hassan N and Gupta, Divyam and Singha, Mainak and Rongali, Sai Bhargav and Jha, Ankit and Khan, Muhammad Haris and Banerjee, Biplab}, title = {OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10110-10120} }
Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection: Jia Guo,

Shuai Lu,

Weihang Zhang,

Fang Chen,

Huiqi Li,

Hongen Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Jia and Lu, Shuai and Zhang, Weihang and Chen, Fang and Li, Huiqi and Liao, Hongen}, title = {Dinomaly: The Less Is More Philosophy in Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20405-20415} }
VLog: Video-Language Models by Generative Retrieval of Narration Vocabulary: Kevin Qinghong Lin,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {VLog: Video-Language Models by Generative Retrieval of Narration Vocabulary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3218-3228} }
CoMBO: Conflict Mitigation via Branched Optimization for Class Incremental Segmentation: Kai Fang,

Anqi Zhang,

Guangyu Gao,

Jianbo Jiao,

Chi Harold Liu,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Kai and Zhang, Anqi and Gao, Guangyu and Jiao, Jianbo and Liu, Chi Harold and Wei, Yunchao}, title = {CoMBO: Conflict Mitigation via Branched Optimization for Class Incremental Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25667-25676} }
Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite for Large Vision Language Models: Jin Wang,

Chenghui Lv,

Xian Li,

Shichao Dong,

Huadong Li,

Kelu Yao,

Chao Li,

Wenqi Shao,

Ping Luo; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jin and Lv, Chenghui and Li, Xian and Dong, Shichao and Li, Huadong and Yao, Kelu and Li, Chao and Shao, Wenqi and Luo, Ping}, title = {Forensics-Bench: A Comprehensive Forgery Detection Benchmark Suite for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4233-4245} }
Detect-and-Guide: Self-regulation of Diffusion Models for Safe Text-to-Image Generation via Guideline Token Optimization: Feifei Li,

Mi Zhang,

Yiming Sun,

Min Yang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Feifei and Zhang, Mi and Sun, Yiming and Yang, Min}, title = {Detect-and-Guide: Self-regulation of Diffusion Models for Safe Text-to-Image Generation via Guideline Token Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13252-13262} }
MirrorVerse: Pushing Diffusion Models to Realistically Reflect the World: Ankit Dhiman,

Manan Shah,

R Venkatesh Babu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dhiman_2025_CVPR, author = {Dhiman, Ankit and Shah, Manan and Babu, R Venkatesh}, title = {MirrorVerse: Pushing Diffusion Models to Realistically Reflect the World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11239-11249} }
EAP-GS: Efficient Augmentation of Pointcloud for 3D Gaussian Splatting in Few-shot Scene Reconstruction: Dongrui Dai,

Yuxiang Xing; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Dongrui and Xing, Yuxiang}, title = {EAP-GS: Efficient Augmentation of Pointcloud for 3D Gaussian Splatting in Few-shot Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16498-16507} }
Empowering Large Language Models with 3D Situation Awareness: Zhihao Yuan,

Yibo Peng,

Jinke Ren,

Yinghong Liao,

Yatong Han,

Chun-Mei Feng,

Hengshuang Zhao,

Guanbin Li,

Shuguang Cui,

Zhen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Zhihao and Peng, Yibo and Ren, Jinke and Liao, Yinghong and Han, Yatong and Feng, Chun-Mei and Zhao, Hengshuang and Li, Guanbin and Cui, Shuguang and Li, Zhen}, title = {Empowering Large Language Models with 3D Situation Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19435-19445} }
Forensic Self-Descriptions Are All You Need for Zero-Shot Detection, Open-Set Source Attribution, and Clustering of AI-generated Images: Tai D. Nguyen,

Aref Azizpour,

Matthew C. Stamm; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Tai D. and Azizpour, Aref and Stamm, Matthew C.}, title = {Forensic Self-Descriptions Are All You Need for Zero-Shot Detection, Open-Set Source Attribution, and Clustering of AI-generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3040-3050} }
EchoTraffic: Enhancing Traffic Anomaly Understanding with Audio-Visual Insights: Zhenghao Xing,

Hao Chen,

Binzhu Xie,

Jiaqi Xu,

Ziyu Guo,

Xuemiao Xu,

Jianye Hao,

Chi-Wing Fu,

Xiaowei Hu,

Pheng-Ann Heng; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Zhenghao and Chen, Hao and Xie, Binzhu and Xu, Jiaqi and Guo, Ziyu and Xu, Xuemiao and Hao, Jianye and Fu, Chi-Wing and Hu, Xiaowei and Heng, Pheng-Ann}, title = {EchoTraffic: Enhancing Traffic Anomaly Understanding with Audio-Visual Insights}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19098-19108} }
FlexDrive: Toward Trajectory Flexibility in Driving Scene Gaussian Splatting Reconstruction and Rendering: Jingqiu Zhou,

Lue Fan,

Linjiang Huang,

Xiaoyu Shi,

Si Liu,

Zhaoxiang Zhang,

Hongsheng Li; [pdf]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jingqiu and Fan, Lue and Huang, Linjiang and Shi, Xiaoyu and Liu, Si and Zhang, Zhaoxiang and Li, Hongsheng}, title = {FlexDrive: Toward Trajectory Flexibility in Driving Scene Gaussian Splatting Reconstruction and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1549-1558} }
Taming Video Diffusion Prior with Scene-Grounding Guidance for 3D Gaussian Splatting from Sparse Inputs: Yingji Zhong,

Zhihao Li,

Dave Zhenyu Chen,

Lanqing Hong,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Yingji and Li, Zhihao and Chen, Dave Zhenyu and Hong, Lanqing and Xu, Dan}, title = {Taming Video Diffusion Prior with Scene-Grounding Guidance for 3D Gaussian Splatting from Sparse Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6133-6143} }
Interactive Medical Image Segmentation: A Benchmark Dataset and Baseline: Junlong Cheng,

Bin Fu,

Jin Ye,

Guoan Wang,

Tianbin Li,

Haoyu Wang,

Ruoyu Li,

He Yao,

Junren Cheng,

Jingwen Li,

Yanzhou Su,

Min Zhu,

Junjun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Junlong and Fu, Bin and Ye, Jin and Wang, Guoan and Li, Tianbin and Wang, Haoyu and Li, Ruoyu and Yao, He and Cheng, Junren and Li, Jingwen and Su, Yanzhou and Zhu, Min and He, Junjun}, title = {Interactive Medical Image Segmentation: A Benchmark Dataset and Baseline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20841-20851} }
GigaHands: A Massive Annotated Dataset of Bimanual Hand Activities: Rao Fu,

Dingxi Zhang,

Alex Jiang,

Wanjia Fu,

Austin Funk,

Daniel Ritchie,

Srinath Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Rao and Zhang, Dingxi and Jiang, Alex and Fu, Wanjia and Funk, Austin and Ritchie, Daniel and Sridhar, Srinath}, title = {GigaHands: A Massive Annotated Dataset of Bimanual Hand Activities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17461-17474} }
AutoSSVH: Exploring Automated Frame Sampling for Efficient Self-Supervised Video Hashing: Niu Lian,

Jun Li,

Jinpeng Wang,

Ruisheng Luo,

Yaowei Wang,

Shu-Tao Xia,

Bin Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Lian_2025_CVPR, author = {Lian, Niu and Li, Jun and Wang, Jinpeng and Luo, Ruisheng and Wang, Yaowei and Xia, Shu-Tao and Chen, Bin}, title = {AutoSSVH: Exploring Automated Frame Sampling for Efficient Self-Supervised Video Hashing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18881-18890} }
Augmenting Multimodal LLMs with Self-Reflective Tokens for Knowledge-based Visual Question Answering: Federico Cocchi,

Nicholas Moratelli,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cocchi_2025_CVPR, author = {Cocchi, Federico and Moratelli, Nicholas and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Augmenting Multimodal LLMs with Self-Reflective Tokens for Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9199-9209} }
DifIISR: A Diffusion Model with Gradient Guidance for Infrared Image Super-Resolution: Xingyuan Li,

Zirui Wang,

Yang Zou,

Zhixin Chen,

Jun Ma,

Zhiying Jiang,

Long Ma,

Jinyuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xingyuan and Wang, Zirui and Zou, Yang and Chen, Zhixin and Ma, Jun and Jiang, Zhiying and Ma, Long and Liu, Jinyuan}, title = {DifIISR: A Diffusion Model with Gradient Guidance for Infrared Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7534-7544} }
Recurrent Feature Mining and Keypoint Mixup Padding for Category-Agnostic Pose Estimation: Junjie Chen,

Weilong Chen,

Yifan Zuo,

Yuming Fang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Junjie and Chen, Weilong and Zuo, Yifan and Fang, Yuming}, title = {Recurrent Feature Mining and Keypoint Mixup Padding for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22035-22044} }
FrugalNeRF: Fast Convergence for Extreme Few-shot Novel View Synthesis without Learned Priors: Chin-Yang Lin,

Chung-Ho Wu,

Chang-Han Yeh,

Shih-Han Yen,

Cheng Sun,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Chin-Yang and Wu, Chung-Ho and Yeh, Chang-Han and Yen, Shih-Han and Sun, Cheng and Liu, Yu-Lun}, title = {FrugalNeRF: Fast Convergence for Extreme Few-shot Novel View Synthesis without Learned Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11227-11238} }
3D-GSW: 3D Gaussian Splatting for Robust Watermarking: Youngdong Jang,

Hyunje Park,

Feng Yang,

Heeju Ko,

Euijin Choo,

Sangpil Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Youngdong and Park, Hyunje and Yang, Feng and Ko, Heeju and Choo, Euijin and Kim, Sangpil}, title = {3D-GSW: 3D Gaussian Splatting for Robust Watermarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5938-5948} }
Pioneering 4-Bit FP Quantization for Diffusion Models: Mixup-Sign Quantization and Timestep-Aware Fine-Tuning: Maosen Zhao,

Pengtao Chen,

Chong Yu,

Yan Wen,

Xudong Tan,

Tao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Maosen and Chen, Pengtao and Yu, Chong and Wen, Yan and Tan, Xudong and Chen, Tao}, title = {Pioneering 4-Bit FP Quantization for Diffusion Models: Mixup-Sign Quantization and Timestep-Aware Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18134-18143} }
OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation: Pengfei Zhou,

Xiaopeng Peng,

Jiajun Song,

Chuanhao Li,

Zhaopan Xu,

Yue Yang,

Ziyao Guo,

Hao Zhang,

Yuqi Lin,

Yefei He,

Lirui Zhao,

Shuo Liu,

Tianhua Li,

Yuxuan Xie,

Xiaojun Chang,

Yu Qiao,

Wenqi Shao,

Kaipeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Pengfei and Peng, Xiaopeng and Song, Jiajun and Li, Chuanhao and Xu, Zhaopan and Yang, Yue and Guo, Ziyao and Zhang, Hao and Lin, Yuqi and He, Yefei and Zhao, Lirui and Liu, Shuo and Li, Tianhua and Xie, Yuxuan and Chang, Xiaojun and Qiao, Yu and Shao, Wenqi and Zhang, Kaipeng}, title = {OpenING: A Comprehensive Benchmark for Judging Open-ended Interleaved Image-Text Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {56-66} }
Dual Exposure Stereo for Extended Dynamic Range 3D Imaging: Juhyung Choi,

Jinnyeong Kim,

Seokjun Choi,

Jinwoo Lee,

Samuel Brucker,

Mario Bijelic,

Felix Heide,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Juhyung and Kim, Jinnyeong and Choi, Seokjun and Lee, Jinwoo and Brucker, Samuel and Bijelic, Mario and Heide, Felix and Baek, Seung-Hwan}, title = {Dual Exposure Stereo for Extended Dynamic Range 3D Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6283-6293} }
Unbiasing through Textual Descriptions: Mitigating Representation Bias in Video Benchmarks: Nina Shvetsova,

Arsha Nagrani,

Bernt Schiele,

Hilde Kuehne,

Christian Rupprecht; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shvetsova_2025_CVPR, author = {Shvetsova, Nina and Nagrani, Arsha and Schiele, Bernt and Kuehne, Hilde and Rupprecht, Christian}, title = {Unbiasing through Textual Descriptions: Mitigating Representation Bias in Video Benchmarks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29050-29059} }
Embodied Scene Understanding for Vision Language Models via MetaVQA: Weizhen Wang,

Chenda Duan,

Zhenghao Peng,

Yuxin Liu,

Bolei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Weizhen and Duan, Chenda and Peng, Zhenghao and Liu, Yuxin and Zhou, Bolei}, title = {Embodied Scene Understanding for Vision Language Models via MetaVQA}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22453-22464} }
CompGS: Unleashing 2D Compositionality for Compositional Text-to-3D via Dynamically Optimizing 3D Gaussians: Chongjian Ge,

Chenfeng Xu,

Yuanfeng Ji,

Chensheng Peng,

Masayoshi Tomizuka,

Ping Luo,

Mingyu Ding,

Varun Jampani,

Wei Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_CVPR, author = {Ge, Chongjian and Xu, Chenfeng and Ji, Yuanfeng and Peng, Chensheng and Tomizuka, Masayoshi and Luo, Ping and Ding, Mingyu and Jampani, Varun and Zhan, Wei}, title = {CompGS: Unleashing 2D Compositionality for Compositional Text-to-3D via Dynamically Optimizing 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18509-18520} }
Learning Temporally Consistent Video Depth from Video Diffusion Priors: Jiahao Shao,

Yuanbo Yang,

Hongyu Zhou,

Youmin Zhang,

Yujun Shen,

Vitor Guizilini,

Yue Wang,

Matteo Poggi,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Jiahao and Yang, Yuanbo and Zhou, Hongyu and Zhang, Youmin and Shen, Yujun and Guizilini, Vitor and Wang, Yue and Poggi, Matteo and Liao, Yiyi}, title = {Learning Temporally Consistent Video Depth from Video Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22841-22852} }
FIRE: Robust Detection of Diffusion-Generated Images via Frequency-Guided Reconstruction Error: Beilin Chu,

Xuan Xu,

Xin Wang,

Yufei Zhang,

Weike You,

Linna Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chu_2025_CVPR, author = {Chu, Beilin and Xu, Xuan and Wang, Xin and Zhang, Yufei and You, Weike and Zhou, Linna}, title = {FIRE: Robust Detection of Diffusion-Generated Images via Frequency-Guided Reconstruction Error}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12830-12839} }
Assessing and Learning Alignment of Unimodal Vision and Language Models: Le Zhang,

Qian Yang,

Aishwarya Agrawal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Le and Yang, Qian and Agrawal, Aishwarya}, title = {Assessing and Learning Alignment of Unimodal Vision and Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14604-14614} }
Samba: A Unified Mamba-based Framework for General Salient Object Detection: Jiahao He,

Keren Fu,

Xiaohong Liu,

Qijun Zhao; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Jiahao and Fu, Keren and Liu, Xiaohong and Zhao, Qijun}, title = {Samba: A Unified Mamba-based Framework for General Salient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25314-25324} }
Action Detail Matters: Refining Video Recognition with Local Action Queries: Mengmeng Wang,

Zeyi Huang,

Xiangjie Kong,

Guojiang Shen,

Guang Dai,

Jingdong Wang,

Yong Liu; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Mengmeng and Huang, Zeyi and Kong, Xiangjie and Shen, Guojiang and Dai, Guang and Wang, Jingdong and Liu, Yong}, title = {Action Detail Matters: Refining Video Recognition with Local Action Queries}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19132-19142} }
PAVE: Patching and Adapting Video Large Language Models: Zhuoming Liu,

Yiquan Li,

Khoi Duc Nguyen,

Yiwu Zhong,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhuoming and Li, Yiquan and Nguyen, Khoi Duc and Zhong, Yiwu and Li, Yin}, title = {PAVE: Patching and Adapting Video Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3306-3317} }
LesionLocator: Zero-Shot Universal Tumor Segmentation and Tracking in 3D Whole-Body Imaging: Maximilian Rokuss,

Yannick Kirchhoff,

Seval Akbal,

Balint Kovacs,

Saikat Roy,

Constantin Ulrich,

Tassilo Wald,

Lukas T. Rotkopf,

Heinz-Peter Schlemmer,

Klaus Maier-Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rokuss_2025_CVPR, author = {Rokuss, Maximilian and Kirchhoff, Yannick and Akbal, Seval and Kovacs, Balint and Roy, Saikat and Ulrich, Constantin and Wald, Tassilo and Rotkopf, Lukas T. and Schlemmer, Heinz-Peter and Maier-Hein, Klaus}, title = {LesionLocator: Zero-Shot Universal Tumor Segmentation and Tracking in 3D Whole-Body Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30872-30885} }
Generative Map Priors for Collaborative BEV Semantic Segmentation: Jiahui Fu,

Yue Gong,

Luting Wang,

Shifeng Zhang,

Xu Zhou,

Si Liu; [pdf]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Jiahui and Gong, Yue and Wang, Luting and Zhang, Shifeng and Zhou, Xu and Liu, Si}, title = {Generative Map Priors for Collaborative BEV Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11919-11928} }
Coherent 3D Portrait Video Reconstruction via Triplane Fusion: Shengze Wang,

Xueting Li,

Chao Liu,

Matthew Chan,

Michael Stengel,

Henry Fuchs,

Shalini De Mello,

Koki Nagano; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shengze and Li, Xueting and Liu, Chao and Chan, Matthew and Stengel, Michael and Fuchs, Henry and De Mello, Shalini and Nagano, Koki}, title = {Coherent 3D Portrait Video Reconstruction via Triplane Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10712-10722} }
Generative Image Layer Decomposition with Visual Effects: Jinrui Yang,

Qing Liu,

Yijun Li,

Soo Ye Kim,

Daniil Pakhomov,

Mengwei Ren,

Jianming Zhang,

Zhe Lin,

Cihang Xie,

Yuyin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jinrui and Liu, Qing and Li, Yijun and Kim, Soo Ye and Pakhomov, Daniil and Ren, Mengwei and Zhang, Jianming and Lin, Zhe and Xie, Cihang and Zhou, Yuyin}, title = {Generative Image Layer Decomposition with Visual Effects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7643-7653} }
AR-Diffusion: Asynchronous Video Generation with Auto-Regressive Diffusion: Mingzhen Sun,

Weining Wang,

Gen Li,

Jiawei Liu,

Jiahui Sun,

Wanquan Feng,

Shanshan Lao,

Siyu Zhou,

Qian He,

Jing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Mingzhen and Wang, Weining and Li, Gen and Liu, Jiawei and Sun, Jiahui and Feng, Wanquan and Lao, Shanshan and Zhou, Siyu and He, Qian and Liu, Jing}, title = {AR-Diffusion: Asynchronous Video Generation with Auto-Regressive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7364-7373} }
ManiVideo: Generating Hand-Object Manipulation Video with Dexterous and Generalizable Grasping: Youxin Pang,

Ruizhi Shao,

Jiajun Zhang,

Hanzhang Tu,

Yun Liu,

Boyao Zhou,

Hongwen Zhang,

Yebin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Youxin and Shao, Ruizhi and Zhang, Jiajun and Tu, Hanzhang and Liu, Yun and Zhou, Boyao and Zhang, Hongwen and Liu, Yebin}, title = {ManiVideo: Generating Hand-Object Manipulation Video with Dexterous and Generalizable Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12209-12219} }
DOF-GS: Adjustable Depth-of-Field 3D Gaussian Splatting for Post-Capture Refocusing, Defocus Rendering and Blur Removal: Yujie Wang,

Praneeth Chakravarthula,

Baoquan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yujie and Chakravarthula, Praneeth and Chen, Baoquan}, title = {DOF-GS: Adjustable Depth-of-Field 3D Gaussian Splatting for Post-Capture Refocusing, Defocus Rendering and Blur Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21297-21306} }
The Photographer's Eye: Teaching Multimodal Large Language Models to See, and Critique Like Photographers: Daiqing Qi,

Handong Zhao,

Jing Shi,

Simon Jenni,

Yifei Fan,

Franck Dernoncourt,

Scott Cohen,

Sheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Daiqing and Zhao, Handong and Shi, Jing and Jenni, Simon and Fan, Yifei and Dernoncourt, Franck and Cohen, Scott and Li, Sheng}, title = {The Photographer's Eye: Teaching Multimodal Large Language Models to See, and Critique Like Photographers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24807-24816} }
Revisiting Audio-Visual Segmentation with Vision-Centric Transformer: Shaofei Huang,

Rui Ling,

Tianrui Hui,

Hongyu Li,

Xu Zhou,

Shifeng Zhang,

Si Liu,

Richang Hong,

Meng Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Shaofei and Ling, Rui and Hui, Tianrui and Li, Hongyu and Zhou, Xu and Zhang, Shifeng and Liu, Si and Hong, Richang and Wang, Meng}, title = {Revisiting Audio-Visual Segmentation with Vision-Centric Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8352-8361} }
Synergizing Motion and Appearance: Multi-Scale Compensatory Codebooks for Talking Head Video Generation: Shuling Zhao,

Fa-Ting Hong,

Xiaoshui Huang,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shuling and Hong, Fa-Ting and Huang, Xiaoshui and Xu, Dan}, title = {Synergizing Motion and Appearance: Multi-Scale Compensatory Codebooks for Talking Head Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26232-26241} }
HOIGPT: Learning Long-Sequence Hand-Object Interaction with Language Models: Mingzhen Huang,

Fu-Jen Chu,

Bugra Tekin,

Kevin J. Liang,

Haoyu Ma,

Weiyao Wang,

Xingyu Chen,

Pierre Gleize,

Hongfei Xue,

Siwei Lyu,

Kris Kitani,

Matt Feiszli,

Hao Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Mingzhen and Chu, Fu-Jen and Tekin, Bugra and Liang, Kevin J. and Ma, Haoyu and Wang, Weiyao and Chen, Xingyu and Gleize, Pierre and Xue, Hongfei and Lyu, Siwei and Kitani, Kris and Feiszli, Matt and Tang, Hao}, title = {HOIGPT: Learning Long-Sequence Hand-Object Interaction with Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7136-7146} }
GraphI2P: Image-to-Point Cloud Registration with Exploring Pattern of Correspondence via Graph Learning: Lin Bie,

Shouan Pan,

Siqi Li,

Yining Zhao,

Yue Gao; [pdf] [supp]
[bibtex]
@InProceedings{Bie_2025_CVPR, author = {Bie, Lin and Pan, Shouan and Li, Siqi and Zhao, Yining and Gao, Yue}, title = {GraphI2P: Image-to-Point Cloud Registration with Exploring Pattern of Correspondence via Graph Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22161-22171} }
SoftVQ-VAE: Efficient 1-Dimensional Continuous Tokenizer: Hao Chen,

Ze Wang,

Xiang Li,

Ximeng Sun,

Fangyi Chen,

Jiang Liu,

Jindong Wang,

Bhiksha Raj,

Zicheng Liu,

Emad Barsoum; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Hao and Wang, Ze and Li, Xiang and Sun, Ximeng and Chen, Fangyi and Liu, Jiang and Wang, Jindong and Raj, Bhiksha and Liu, Zicheng and Barsoum, Emad}, title = {SoftVQ-VAE: Efficient 1-Dimensional Continuous Tokenizer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28358-28370} }
FedCS: Coreset Selection for Federated Learning: Chenhe Hao,

Weiying Xie,

Daixun Li,

Haonan Qin,

Hangyu Ye,

Leyuan Fang,

Yunsong Li; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Chenhe and Xie, Weiying and Li, Daixun and Qin, Haonan and Ye, Hangyu and Fang, Leyuan and Li, Yunsong}, title = {FedCS: Coreset Selection for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15434-15443} }
DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models: Haoyang Li,

Liang Wang,

Chao Wang,

Jing Jiang,

Yan Peng,

Guodong Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haoyang and Wang, Liang and Wang, Chao and Jiang, Jing and Peng, Yan and Long, Guodong}, title = {DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25623-25632} }
Dual-Granularity Semantic Guided Sparse Routing Diffusion Model for General Pansharpening: Yinghui Xing,

Litao Qu,

Shizhou Zhang,

Di Xu,

Yingkun Yang,

Yanning Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Yinghui and Qu, Litao and Zhang, Shizhou and Xu, Di and Yang, Yingkun and Zhang, Yanning}, title = {Dual-Granularity Semantic Guided Sparse Routing Diffusion Model for General Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12658-12668} }
AIM-Fair: Advancing Algorithmic Fairness via Selectively Fine-Tuning Biased Models with Contextual Synthetic Data: Zengqun Zhao,

Ziquan Liu,

Yu Cao,

Shaogang Gong,

Ioannis Patras; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zengqun and Liu, Ziquan and Cao, Yu and Gong, Shaogang and Patras, Ioannis}, title = {AIM-Fair: Advancing Algorithmic Fairness via Selectively Fine-Tuning Biased Models with Contextual Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28748-28758} }
Robust Multi-Object 4D Generation for In-the-wild Videos: Wen-Hsuan Chu,

Lei Ke,

Jianmeng Liu,

Mingxiao Huo,

Pavel Tokmakov,

Katerina Fragkiadaki; [pdf] [supp]
[bibtex]
@InProceedings{Chu_2025_CVPR, author = {Chu, Wen-Hsuan and Ke, Lei and Liu, Jianmeng and Huo, Mingxiao and Tokmakov, Pavel and Fragkiadaki, Katerina}, title = {Robust Multi-Object 4D Generation for In-the-wild Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22067-22077} }
OmniMMI: A Comprehensive Multi-modal Interaction Benchmark in Streaming Video Contexts: Yuxuan Wang,

Yueqian Wang,

Bo Chen,

Tong Wu,

Dongyan Zhao,

Zilong Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuxuan and Wang, Yueqian and Chen, Bo and Wu, Tong and Zhao, Dongyan and Zheng, Zilong}, title = {OmniMMI: A Comprehensive Multi-modal Interaction Benchmark in Streaming Video Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18925-18935} }
SOAP: Vision-Centric 3D Semantic Scene Completion with Scene-Adaptive Decoder and Occluded Region-Aware View Projection: Hyo-Jun Lee,

Yeong Jun Koh,

Hanul Kim,

Hyunseop Kim,

Yonguk Lee,

Jinu Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Hyo-Jun and Koh, Yeong Jun and Kim, Hanul and Kim, Hyunseop and Lee, Yonguk and Lee, Jinu}, title = {SOAP: Vision-Centric 3D Semantic Scene Completion with Scene-Adaptive Decoder and Occluded Region-Aware View Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17145-17154} }
Taxonomy-Aware Evaluation of Vision-Language Models: Vésteinn Snæbjarnarson,

Kevin Du,

Niklas Stoehr,

Serge Belongie,

Ryan Cotterell,

Nico Lang,

Stella Frank; [pdf] [supp]
[bibtex]
@InProceedings{Snaebjarnarson_2025_CVPR, author = {Sn{\ae}bjarnarson, V\'esteinn and Du, Kevin and Stoehr, Niklas and Belongie, Serge and Cotterell, Ryan and Lang, Nico and Frank, Stella}, title = {Taxonomy-Aware Evaluation of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9109-9120} }
Active Event-based Stereo Vision: Jianing Li,

Yunjian Zhang,

Haiqian Han,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jianing and Zhang, Yunjian and Han, Haiqian and Ji, Xiangyang}, title = {Active Event-based Stereo Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {971-981} }
Mono-InternVL: Pushing the Boundaries of Monolithic Multimodal Large Language Models with Endogenous Visual Pre-training: Gen Luo,

Xue Yang,

Wenhan Dou,

Zhaokai Wang,

Jiawen Liu,

Jifeng Dai,

Yu Qiao,

Xizhou Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Gen and Yang, Xue and Dou, Wenhan and Wang, Zhaokai and Liu, Jiawen and Dai, Jifeng and Qiao, Yu and Zhu, Xizhou}, title = {Mono-InternVL: Pushing the Boundaries of Monolithic Multimodal Large Language Models with Endogenous Visual Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24960-24971} }
SimVS: Simulating World Inconsistencies for Robust View Synthesis: Alex Trevithick,

Roni Paiss,

Philipp Henzler,

Dor Verbin,

Rundi Wu,

Hadi Alzayer,

Ruiqi Gao,

Ben Poole,

Jonathan T. Barron,

Aleksander Holynski,

Ravi Ramamoorthi,

Pratul P. Srinivasan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Trevithick_2025_CVPR, author = {Trevithick, Alex and Paiss, Roni and Henzler, Philipp and Verbin, Dor and Wu, Rundi and Alzayer, Hadi and Gao, Ruiqi and Poole, Ben and Barron, Jonathan T. and Holynski, Aleksander and Ramamoorthi, Ravi and Srinivasan, Pratul P.}, title = {SimVS: Simulating World Inconsistencies for Robust View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16464-16474} }
FLAVC: Learned Video Compression with Feature Level Attention: Chun Zhang,

Heming Sun,

Jiro Katto; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chun and Sun, Heming and Katto, Jiro}, title = {FLAVC: Learned Video Compression with Feature Level Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28019-28028} }
An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models: Wentao Qu,

Jing Wang,

YongShun Gong,

Xiaoshui Huang,

Liang Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Wentao and Wang, Jing and Gong, YongShun and Huang, Xiaoshui and Xiao, Liang}, title = {An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27325-27335} }
From Zero to Detail: Deconstructing Ultra-High-Definition Image Restoration from Progressive Spectral Perspective: Chen Zhao,

Zhizhou Chen,

Yunzhe Xu,

Enxuan Gu,

Jian Li,

Zili Yi,

Qian Wang,

Jian Yang,

Ying Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Chen and Chen, Zhizhou and Xu, Yunzhe and Gu, Enxuan and Li, Jian and Yi, Zili and Wang, Qian and Yang, Jian and Tai, Ying}, title = {From Zero to Detail: Deconstructing Ultra-High-Definition Image Restoration from Progressive Spectral Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17935-17946} }
SMILE: Infusing Spatial and Motion Semantics in Masked Video Learning: Fida Mohammad Thoker,

Letian Jiang,

Chen Zhao,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thoker_2025_CVPR, author = {Thoker, Fida Mohammad and Jiang, Letian and Zhao, Chen and Ghanem, Bernard}, title = {SMILE: Infusing Spatial and Motion Semantics in Masked Video Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8438-8449} }
Video Language Model Pretraining with Spatio-temporal Masking: Yue Wu,

Zhaobo Qi,

Junshu Sun,

Yaowei Wang,

Qingming Huang,

Shuhui Wang; [pdf]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yue and Qi, Zhaobo and Sun, Junshu and Wang, Yaowei and Huang, Qingming and Wang, Shuhui}, title = {Video Language Model Pretraining with Spatio-temporal Masking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8557-8567} }
COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training: Sanghwan Kim,

Rui Xiao,

Mariana-Iuliana Georgescu,

Stephan Alaniz,

Zeynep Akata; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sanghwan and Xiao, Rui and Georgescu, Mariana-Iuliana and Alaniz, Stephan and Akata, Zeynep}, title = {COSMOS: Cross-Modality Self-Distillation for Vision Language Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14690-14700} }
Lifting Motion to the 3D World via 2D Diffusion: Jiaman Li,

C. Karen Liu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiaman and Liu, C. Karen and Wu, Jiajun}, title = {Lifting Motion to the 3D World via 2D Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17518-17528} }
TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models: Xin Wang,

Kai Chen,

Jiaming Zhang,

Jingjing Chen,

Xingjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xin and Chen, Kai and Zhang, Jiaming and Chen, Jingjing and Ma, Xingjun}, title = {TAPT: Test-Time Adversarial Prompt Tuning for Robust Inference in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19910-19920} }
Active Data Curation Effectively Distills Large-Scale Multimodal Models: Vishaal Udandarao,

Nikhil Parthasarathy,

Muhammad Ferjad Naeem,

Talfan Evans,

Samuel Albanie,

Federico Tombari,

Yongqin Xian,

Alessio Tonioni,

Olivier J. Henaff; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Udandarao_2025_CVPR, author = {Udandarao, Vishaal and Parthasarathy, Nikhil and Naeem, Muhammad Ferjad and Evans, Talfan and Albanie, Samuel and Tombari, Federico and Xian, Yongqin and Tonioni, Alessio and Henaff, Olivier J.}, title = {Active Data Curation Effectively Distills Large-Scale Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14422-14437} }
PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors: Guangshun Wei,

Yuan Feng,

Long Ma,

Chen Wang,

Yuanfeng Zhou,

Changjian Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Guangshun and Feng, Yuan and Ma, Long and Wang, Chen and Zhou, Yuanfeng and Li, Changjian}, title = {PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27243-27253} }
Your ViT is Secretly an Image Segmentation Model: Tommie Kerssies,

Niccolò Cavagnero,

Alexander Hermans,

Narges Norouzi,

Giuseppe Averta,

Bastian Leibe,

Gijs Dubbelman,

Daan de Geus; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kerssies_2025_CVPR, author = {Kerssies, Tommie and Cavagnero, Niccol\`o and Hermans, Alexander and Norouzi, Narges and Averta, Giuseppe and Leibe, Bastian and Dubbelman, Gijs and de Geus, Daan}, title = {Your ViT is Secretly an Image Segmentation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25303-25313} }
Cross-Rejective Open-Set SAR Image Registration: Shasha Mao,

Shiming Lu,

Zhaolong Du,

Licheng Jiao,

Shuiping Gou,

Luntian Mou,

Xuequan Lu,

Lin Xiong,

Yimeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Shasha and Lu, Shiming and Du, Zhaolong and Jiao, Licheng and Gou, Shuiping and Mou, Luntian and Lu, Xuequan and Xiong, Lin and Zhang, Yimeng}, title = {Cross-Rejective Open-Set SAR Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23027-23036} }
Synthetic Data is an Elegant GIFT for Continual Vision-Language Models: Bin Wu,

Wuxuan Shi,

Jinqiao Wang,

Mang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Bin and Shi, Wuxuan and Wang, Jinqiao and Ye, Mang}, title = {Synthetic Data is an Elegant GIFT for Continual Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2813-2823} }
SplineGS: Robust Motion-Adaptive Spline for Real-Time Dynamic 3D Gaussians from Monocular Video: Jongmin Park,

Minh-Quan Viet Bui,

Juan Luis Gonzalez Bello,

Jaeho Moon,

Jihyong Oh,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jongmin and Bui, Minh-Quan Viet and Bello, Juan Luis Gonzalez and Moon, Jaeho and Oh, Jihyong and Kim, Munchurl}, title = {SplineGS: Robust Motion-Adaptive Spline for Real-Time Dynamic 3D Gaussians from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26866-26875} }
SCSA: A Plug-and-Play Semantic Continuous-Sparse Attention for Arbitrary Semantic Style Transfer: Chunnan Shang,

Zhizhong Wang,

Hongwei Wang,

Xiangming Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shang_2025_CVPR, author = {Shang, Chunnan and Wang, Zhizhong and Wang, Hongwei and Meng, Xiangming}, title = {SCSA: A Plug-and-Play Semantic Continuous-Sparse Attention for Arbitrary Semantic Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13051-13060} }
Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model: Feng Liu,

Shiwei Zhang,

Xiaofeng Wang,

Yujie Wei,

Haonan Qiu,

Yuzhong Zhao,

Yingya Zhang,

Qixiang Ye,

Fang Wan; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Feng and Zhang, Shiwei and Wang, Xiaofeng and Wei, Yujie and Qiu, Haonan and Zhao, Yuzhong and Zhang, Yingya and Ye, Qixiang and Wan, Fang}, title = {Timestep Embedding Tells: It's Time to Cache for Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7353-7363} }
Can't Slow Me Down: Learning Robust and Hardware-Adaptive Object Detectors against Latency Attacks for Edge Devices: Tianyi Wang,

Zichen Wang,

Cong Wang,

Yuanchao Shu,

Ruilong Deng,

Peng Cheng,

Jiming Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tianyi and Wang, Zichen and Wang, Cong and Shu, Yuanchao and Deng, Ruilong and Cheng, Peng and Chen, Jiming}, title = {Can't Slow Me Down: Learning Robust and Hardware-Adaptive Object Detectors against Latency Attacks for Edge Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19230-19240} }
Multi-modal Knowledge Distillation-based Human Trajectory Forecasting: Jaewoo Jeong,

Seohee Lee,

Daehee Park,

Giwon Lee,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Jaewoo and Lee, Seohee and Park, Daehee and Lee, Giwon and Yoon, Kuk-Jin}, title = {Multi-modal Knowledge Distillation-based Human Trajectory Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24222-24233} }
SAM2Object: Consolidating View Consistency via SAM2 for Zero-Shot 3D Instance Segmentation: Jihuai Zhao,

Junbao Zhuo,

Jiansheng Chen,

Huimin Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jihuai and Zhuo, Junbao and Chen, Jiansheng and Ma, Huimin}, title = {SAM2Object: Consolidating View Consistency via SAM2 for Zero-Shot 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19325-19334} }
CDI: Copyrighted Data Identification in Diffusion Models: Jan Dubiński,

Antoni Kowalczuk,

Franziska Boenisch,

Adam Dziedzic; [pdf] [supp]
[bibtex]
@InProceedings{Dubinski_2025_CVPR, author = {Dubi\'nski, Jan and Kowalczuk, Antoni and Boenisch, Franziska and Dziedzic, Adam}, title = {CDI: Copyrighted Data Identification in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18674-18684} }
Hypergraph Vision Transformers: Images are More than Nodes, More than Edges: Joshua Fixelle; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fixelle_2025_CVPR, author = {Fixelle, Joshua}, title = {Hypergraph Vision Transformers: Images are More than Nodes, More than Edges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9751-9761} }
Binarized Neural Network for Multi-spectral Image Fusion: Junming Hou,

Xiaoyu Chen,

Ran Ran,

Xiaofeng Cong,

Xinyang Liu,

Jian Wei You,

Liang-Jian Deng; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2025_CVPR, author = {Hou, Junming and Chen, Xiaoyu and Ran, Ran and Cong, Xiaofeng and Liu, Xinyang and You, Jian Wei and Deng, Liang-Jian}, title = {Binarized Neural Network for Multi-spectral Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2236-2245} }
CRISP: Object Pose and Shape Estimation with Test-Time Adaptation: Jingnan Shi,

Rajat Talak,

Harry Zhang,

David Jin,

Luca Carlone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Jingnan and Talak, Rajat and Zhang, Harry and Jin, David and Carlone, Luca}, title = {CRISP: Object Pose and Shape Estimation with Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11644-11653} }
ShiftwiseConv: Small Convolutional Kernel with Large Kernel Effect: Dachong Li,

Li Li,

Zhuangzhuang Chen,

Jianqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Dachong and Li, Li and Chen, Zhuangzhuang and Li, Jianqiang}, title = {ShiftwiseConv: Small Convolutional Kernel with Large Kernel Effect}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25281-25291} }
GaussianIP: Identity-Preserving Realistic 3D Human Generation via Human-Centric Diffusion Prior: Zichen Tang,

Yuan Yao,

Miaomiao Cui,

Liefeng Bo,

Hongyu Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Zichen and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng and Yang, Hongyu}, title = {GaussianIP: Identity-Preserving Realistic 3D Human Generation via Human-Centric Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {348-358} }
Creating Your Editable 3D Photorealistic Avatar with Tetrahedron-constrained Gaussian Splatting: Hanxi Liu,

Yifang Men,

Zhouhui Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hanxi and Men, Yifang and Lian, Zhouhui}, title = {Creating Your Editable 3D Photorealistic Avatar with Tetrahedron-constrained Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15976-15986} }
FineVQ: Fine-Grained User Generated Content Video Quality Assessment: Huiyu Duan,

Qiang Hu,

Jiarui Wang,

Liu Yang,

Zitong Xu,

Lu Liu,

Xiongkuo Min,

Chunlei Cai,

Tianxiao Ye,

Xiaoyun Zhang,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Huiyu and Hu, Qiang and Wang, Jiarui and Yang, Liu and Xu, Zitong and Liu, Lu and Min, Xiongkuo and Cai, Chunlei and Ye, Tianxiao and Zhang, Xiaoyun and Zhai, Guangtao}, title = {FineVQ: Fine-Grained User Generated Content Video Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3206-3217} }
Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering Incorrectly: Yexin Liu,

Zhengyang Liang,

Yueze Wang,

Xianfeng Wu,

Feilong Tang,

Muyang He,

Jian Li,

Zheng Liu,

Harry Yang,

Sernam Lim,

Bo Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yexin and Liang, Zhengyang and Wang, Yueze and Wu, Xianfeng and Tang, Feilong and He, Muyang and Li, Jian and Liu, Zheng and Yang, Harry and Lim, Sernam and Zhao, Bo}, title = {Unveiling the Ignorance of MLLMs: Seeing Clearly, Answering Incorrectly}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9087-9097} }
Object-Shot Enhanced Grounding Network for Egocentric Video: Yisen Feng,

Haoyu Zhang,

Meng Liu,

Weili Guan,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Yisen and Zhang, Haoyu and Liu, Meng and Guan, Weili and Nie, Liqiang}, title = {Object-Shot Enhanced Grounding Network for Egocentric Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24190-24200} }
MANTA: Diffusion Mamba for Efficient and Effective Stochastic Long-Term Dense Action Anticipation: Olga Zatsarynna,

Emad Bahrami,

Yazan Abu Farha,

Gianpiero Francesca,

Juergen Gall; [pdf] [supp]
[bibtex]
@InProceedings{Zatsarynna_2025_CVPR, author = {Zatsarynna, Olga and Bahrami, Emad and Abu Farha, Yazan and Francesca, Gianpiero and Gall, Juergen}, title = {MANTA: Diffusion Mamba for Efficient and Effective Stochastic Long-Term Dense Action Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3438-3448} }
METASCENES: Towards Automated Replica Creation for Real-world 3D Scans: Huangyue Yu,

Baoxiong Jia,

Yixin Chen,

Yandan Yang,

Puhao Li,

Rongpeng Su,

Jiaxin Li,

Qing Li,

Wei Liang,

Song-Chun Zhu,

Tengyu Liu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Huangyue and Jia, Baoxiong and Chen, Yixin and Yang, Yandan and Li, Puhao and Su, Rongpeng and Li, Jiaxin and Li, Qing and Liang, Wei and Zhu, Song-Chun and Liu, Tengyu and Huang, Siyuan}, title = {METASCENES: Towards Automated Replica Creation for Real-world 3D Scans}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1667-1679} }
Robust Multimodal Survival Prediction with Conditional Latent Differentiation Variational AutoEncoder: Junjie Zhou,

Jiao Tang,

Yingli Zuo,

Peng Wan,

Daoqiang Zhang,

Wei Shao; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Tang, Jiao and Zuo, Yingli and Wan, Peng and Zhang, Daoqiang and Shao, Wei}, title = {Robust Multimodal Survival Prediction with Conditional Latent Differentiation Variational AutoEncoder}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10384-10393} }
Sim-to-Real Causal Transfer: A Metric Learning Approach to Causally-Aware Interaction Representations: Ahmad Rahimi,

Po-Chien Luan,

Yuejiang Liu,

Frano Rajič,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahimi_2025_CVPR, author = {Rahimi, Ahmad and Luan, Po-Chien and Liu, Yuejiang and Raji\v{c}, Frano and Alahi, Alexandre}, title = {Sim-to-Real Causal Transfer: A Metric Learning Approach to Causally-Aware Interaction Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17271-17281} }
Ev-3DOD: Pushing the Temporal Boundaries of 3D Object Detection with Event Cameras: Hoonhee Cho,

Jae-Young Kang,

Youngho Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2025_CVPR, author = {Cho, Hoonhee and Kang, Jae-Young and Kim, Youngho and Yoon, Kuk-Jin}, title = {Ev-3DOD: Pushing the Temporal Boundaries of 3D Object Detection with Event Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27197-27210} }
Zero-Shot Blind-spot Image Denoising via Implicit Neural Sampling: Yuhui Quan,

Tianxiang Zheng,

Zhiyuan Ma,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{Quan_2025_CVPR, author = {Quan, Yuhui and Zheng, Tianxiang and Ma, Zhiyuan and Ji, Hui}, title = {Zero-Shot Blind-spot Image Denoising via Implicit Neural Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7502-7512} }
Nearly Zero-Cost Protection Against Mimicry by Personalized Diffusion Models: Namhyuk Ahn,

KiYoon Yoo,

Wonhyuk Ahn,

Daesik Kim,

Seung-Hun Nam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahn_2025_CVPR, author = {Ahn, Namhyuk and Yoo, KiYoon and Ahn, Wonhyuk and Kim, Daesik and Nam, Seung-Hun}, title = {Nearly Zero-Cost Protection Against Mimicry by Personalized Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28801-28810} }
Tripartite Weight-Space Ensemble for Few-Shot Class-Incremental Learning: Juntae Lee,

Munawar Hayat,

Sungrack Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Juntae and Hayat, Munawar and Yun, Sungrack}, title = {Tripartite Weight-Space Ensemble for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15329-15338} }
The Devil is in Temporal Token: High Quality Video Reasoning Segmentation: Sitong Gong,

Yunzhi Zhuge,

Lu Zhang,

Zongxin Yang,

Pingping Zhang,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_CVPR, author = {Gong, Sitong and Zhuge, Yunzhi and Zhang, Lu and Yang, Zongxin and Zhang, Pingping and Lu, Huchuan}, title = {The Devil is in Temporal Token: High Quality Video Reasoning Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29183-29192} }
PerLA: Perceptive 3D Language Assistant: Guofeng Mei,

Wei Lin,

Luigi Riz,

Yujiao Wu,

Fabio Poiesi,

Yiming Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Guofeng and Lin, Wei and Riz, Luigi and Wu, Yujiao and Poiesi, Fabio and Wang, Yiming}, title = {PerLA: Perceptive 3D Language Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14369-14379} }
LITA-GS: Illumination-Agnostic Novel View Synthesis via Reference-Free 3D Gaussian Splatting and Physical Priors: Han Zhou,

Wei Dong,

Jun Chen; [pdf]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Han and Dong, Wei and Chen, Jun}, title = {LITA-GS: Illumination-Agnostic Novel View Synthesis via Reference-Free 3D Gaussian Splatting and Physical Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21580-21589} }
PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded Text-to-Video Generation: Qiyao Xue,

Xiangyu Yin,

Boyuan Yang,

Wei Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Qiyao and Yin, Xiangyu and Yang, Boyuan and Gao, Wei}, title = {PhyT2V: LLM-Guided Iterative Self-Refinement for Physics-Grounded Text-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18826-18836} }
Track4Gen: Teaching Video Diffusion Models to Track Points Improves Video Generation: Hyeonho Jeong,

Chun-Hao P. Huang,

Jong Chul Ye,

Niloy J. Mitra,

Duygu Ceylan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Hyeonho and Huang, Chun-Hao P. and Ye, Jong Chul and Mitra, Niloy J. and Ceylan, Duygu}, title = {Track4Gen: Teaching Video Diffusion Models to Track Points Improves Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7276-7287} }
Mask^2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long Video Generation: Tianhao Qi,

Jianlong Yuan,

Wanquan Feng,

Shancheng Fang,

Jiawei Liu,

SiYu Zhou,

Qian He,

Hongtao Xie,

Yongdong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Tianhao and Yuan, Jianlong and Feng, Wanquan and Fang, Shancheng and Liu, Jiawei and Zhou, SiYu and He, Qian and Xie, Hongtao and Zhang, Yongdong}, title = {Mask{\textasciicircum}2DiT: Dual Mask-based Diffusion Transformer for Multi-Scene Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18837-18846} }
JamMa: Ultra-lightweight Local Feature Matching with Joint Mamba: Xiaoyong Lu,

Songlin Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Xiaoyong and Du, Songlin}, title = {JamMa: Ultra-lightweight Local Feature Matching with Joint Mamba}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14934-14943} }
DyCoke: Dynamic Compression of Tokens for Fast Video Large Language Models: Keda Tao,

Can Qin,

Haoxuan You,

Yang Sui,

Huan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_CVPR, author = {Tao, Keda and Qin, Can and You, Haoxuan and Sui, Yang and Wang, Huan}, title = {DyCoke: Dynamic Compression of Tokens for Fast Video Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18992-19001} }
T-FAKE: Synthesizing Thermal Images for Facial Landmarking: Philipp Flotho,

Moritz Piening,

Anna Kukleva,

Gabriele Steidl; [pdf] [supp]
[bibtex]
@InProceedings{Flotho_2025_CVPR, author = {Flotho, Philipp and Piening, Moritz and Kukleva, Anna and Steidl, Gabriele}, title = {T-FAKE: Synthesizing Thermal Images for Facial Landmarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26356-26366} }
Multi-Resolution Pathology-Language Pre-training Model with Text-Guided Visual Representation: Shahad Albastaki,

Anabia Sohail,

Iyyakutti Iyappan Ganapathi,

Basit Alawode,

Asim Khan,

Sajid Javed,

Naoufel Werghi,

Mohammed Bennamoun,

Arif Mahmood; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Albastaki_2025_CVPR, author = {Albastaki, Shahad and Sohail, Anabia and Ganapathi, Iyyakutti Iyappan and Alawode, Basit and Khan, Asim and Javed, Sajid and Werghi, Naoufel and Bennamoun, Mohammed and Mahmood, Arif}, title = {Multi-Resolution Pathology-Language Pre-training Model with Text-Guided Visual Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25907-25919} }
InteractAnything: Zero-shot Human Object Interaction Synthesis via LLM Feedback and Object Affordance Parsing: Jinlu Zhang,

Yixin Chen,

Zan Wang,

Jie Yang,

Yizhou Wang,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinlu and Chen, Yixin and Wang, Zan and Yang, Jie and Wang, Yizhou and Huang, Siyuan}, title = {InteractAnything: Zero-shot Human Object Interaction Synthesis via LLM Feedback and Object Affordance Parsing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7015-7025} }
MammAlps: A Multi-view Video Behavior Monitoring Dataset of Wild Mammals in the Swiss Alps: Valentin Gabeff,

Haozhe Qi,

Brendan Flaherty,

Gencer Sumbul,

Alexander Mathis,

Devis Tuia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gabeff_2025_CVPR, author = {Gabeff, Valentin and Qi, Haozhe and Flaherty, Brendan and Sumbul, Gencer and Mathis, Alexander and Tuia, Devis}, title = {MammAlps: A Multi-view Video Behavior Monitoring Dataset of Wild Mammals in the Swiss Alps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13854-13864} }
Diffusion-based Realistic Listening Head Generation via Hybrid Motion Modeling: Yinuo Wang,

Yanbo Fan,

Xuan Wang,

Guo Yu,

Fei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yinuo and Fan, Yanbo and Wang, Xuan and Yu, Guo and Wang, Fei}, title = {Diffusion-based Realistic Listening Head Generation via Hybrid Motion Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15885-15895} }
SAT-HMR: Real-Time Multi-Person 3D Mesh Estimation via Scale-Adaptive Tokens: Chi Su,

Xiaoxuan Ma,

Jiajun Su,

Yizhou Wang; [pdf] [supp]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Chi and Ma, Xiaoxuan and Su, Jiajun and Wang, Yizhou}, title = {SAT-HMR: Real-Time Multi-Person 3D Mesh Estimation via Scale-Adaptive Tokens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16796-16806} }
PICD: Versatile Perceptual Image Compression with Diffusion Rendering: Tongda Xu,

Jiahao Li,

Bin Li,

Yan Wang,

Ya-Qin Zhang,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Tongda and Li, Jiahao and Li, Bin and Wang, Yan and Zhang, Ya-Qin and Lu, Yan}, title = {PICD: Versatile Perceptual Image Compression with Diffusion Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28436-28445} }
UniScene: Unified Occupancy-centric Driving Scene Generation: Bohan Li,

Jiazhe Guo,

Hongsi Liu,

Yingshuang Zou,

Yikang Ding,

Xiwu Chen,

Hu Zhu,

Feiyang Tan,

Chi Zhang,

Tiancai Wang,

Shuchang Zhou,

Li Zhang,

Xiaojuan Qi,

Hao Zhao,

Mu Yang,

Wenjun Zeng,

Xin Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Bohan and Guo, Jiazhe and Liu, Hongsi and Zou, Yingshuang and Ding, Yikang and Chen, Xiwu and Zhu, Hu and Tan, Feiyang and Zhang, Chi and Wang, Tiancai and Zhou, Shuchang and Zhang, Li and Qi, Xiaojuan and Zhao, Hao and Yang, Mu and Zeng, Wenjun and Jin, Xin}, title = {UniScene: Unified Occupancy-centric Driving Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11971-11981} }
Wonderland: Navigating 3D Scenes from a Single Image: Hanwen Liang,

Junli Cao,

Vidit Goel,

Guocheng Qian,

Sergei Korolev,

Demetri Terzopoulos,

Konstantinos N. Plataniotis,

Sergey Tulyakov,

Jian Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Hanwen and Cao, Junli and Goel, Vidit and Qian, Guocheng and Korolev, Sergei and Terzopoulos, Demetri and Plataniotis, Konstantinos N. and Tulyakov, Sergey and Ren, Jian}, title = {Wonderland: Navigating 3D Scenes from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {798-810} }
Learning from Streaming Video with Orthogonal Gradients: Tengda Han,

Dilara Gokay,

Joseph Heyward,

Chuhan Zhang,

Daniel Zoran,

Viorica Patraucean,

Joao Carreira,

Dima Damen,

Andrew Zisserman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Tengda and Gokay, Dilara and Heyward, Joseph and Zhang, Chuhan and Zoran, Daniel and Patraucean, Viorica and Carreira, Joao and Damen, Dima and Zisserman, Andrew}, title = {Learning from Streaming Video with Orthogonal Gradients}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13651-13660} }
Towards Satellite Image Road Graph Extraction: A Global-Scale Dataset and A Novel Method: Pan Yin,

Kaiyu Li,

Xiangyong Cao,

Jing Yao,

Lei Liu,

Xueru Bai,

Feng Zhou,

Deyu Meng; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Pan and Li, Kaiyu and Cao, Xiangyong and Yao, Jing and Liu, Lei and Bai, Xueru and Zhou, Feng and Meng, Deyu}, title = {Towards Satellite Image Road Graph Extraction: A Global-Scale Dataset and A Novel Method}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1527-1537} }
SuperLightNet: Lightweight Parameter Aggregation Network for Multimodal Brain Tumor Segmentation: Feng Yu,

Jiacheng Cao,

Li Liu,

Minghua Jiang; [pdf]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Feng and Cao, Jiacheng and Liu, Li and Jiang, Minghua}, title = {SuperLightNet: Lightweight Parameter Aggregation Network for Multimodal Brain Tumor Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5197-5206} }
VideoSPatS: Video SPatiotemporal Splines for Disentangled Occlusion, Appearance and Motion Modeling and Editing: Juan Luis Gonzalez,

Xu Yao,

Alex Whelan,

Kyle Olszewski,

Hyeongwoo Kim,

Pablo Garrido; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gonzalez_2025_CVPR, author = {Gonzalez, Juan Luis and Yao, Xu and Whelan, Alex and Olszewski, Kyle and Kim, Hyeongwoo and Garrido, Pablo}, title = {VideoSPatS: Video SPatiotemporal Splines for Disentangled Occlusion, Appearance and Motion Modeling and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22901-22910} }
Classifier-to-Bias: Toward Unsupervised Automatic Bias Detection for Visual Classifiers: Quentin Guimard,

Moreno D'Incà,

Massimiliano Mancini,

Elisa Ricci; [pdf] [supp]
[bibtex]
@InProceedings{Guimard_2025_CVPR, author = {Guimard, Quentin and D'Inc\`a, Moreno and Mancini, Massimiliano and Ricci, Elisa}, title = {Classifier-to-Bias: Toward Unsupervised Automatic Bias Detection for Visual Classifiers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15151-15161} }
Self-Supervised Spatial Correspondence Across Modalities: Ayush Shrivastava,

Andrew Owens; [pdf] [arXiv]
[bibtex]
@InProceedings{Shrivastava_2025_CVPR, author = {Shrivastava, Ayush and Owens, Andrew}, title = {Self-Supervised Spatial Correspondence Across Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6383-6393} }
MOS-Attack: A Scalable Multi-objective Adversarial Attack Framework: Ping Guo,

Cheng Gong,

Xi Lin,

Fei Liu,

Zhichao Lu,

Qingfu Zhang,

Zhenkun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Ping and Gong, Cheng and Lin, Xi and Liu, Fei and Lu, Zhichao and Zhang, Qingfu and Wang, Zhenkun}, title = {MOS-Attack: A Scalable Multi-objective Adversarial Attack Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5041-5051} }
Six-CD: Benchmarking Concept Removals for Text-to-image Diffusion Models: Jie Ren,

Kangrui Chen,

Yingqian Cui,

Shenglai Zeng,

Hui Liu,

Yue Xing,

Jiliang Tang,

Lingjuan Lyu; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Jie and Chen, Kangrui and Cui, Yingqian and Zeng, Shenglai and Liu, Hui and Xing, Yue and Tang, Jiliang and Lyu, Lingjuan}, title = {Six-CD: Benchmarking Concept Removals for Text-to-image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28769-28778} }
Motion Modes: What Could Happen Next?: Karran Pandey,

Yannick Hold-Geoffroy,

Matheus Gadelha,

Niloy J. Mitra,

Karan Singh,

Paul Guerrero; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pandey_2025_CVPR, author = {Pandey, Karran and Hold-Geoffroy, Yannick and Gadelha, Matheus and Mitra, Niloy J. and Singh, Karan and Guerrero, Paul}, title = {Motion Modes: What Could Happen Next?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2030-2039} }
Finer-CAM: Spotting the Difference Reveals Finer Details for Visual Explanation: Ziheng Zhang,

Jianyang Gu,

Arpita Chowdhury,

Zheda Mai,

David Carlyn,

Tanya Berger-Wolf,

Yu Su,

Wei-Lun Chao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ziheng and Gu, Jianyang and Chowdhury, Arpita and Mai, Zheda and Carlyn, David and Berger-Wolf, Tanya and Su, Yu and Chao, Wei-Lun}, title = {Finer-CAM: Spotting the Difference Reveals Finer Details for Visual Explanation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9611-9620} }
The Change You Want To Detect: Semantic Change Detection In Earth Observation With Hybrid Data Generationf: Yanis Benidir,

Nicolas Gonthier,

Clement Mallet; [pdf] [supp]
[bibtex]
@InProceedings{Benidir_2025_CVPR, author = {Benidir, Yanis and Gonthier, Nicolas and Mallet, Clement}, title = {The Change You Want To Detect: Semantic Change Detection In Earth Observation With Hybrid Data Generationf}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2204-2214} }
Black-Box Forgery Attacks on Semantic Watermarks for Diffusion Models: Andreas Müller,

Denis Lukovnikov,

Jonas Thietke,

Asja Fischer,

Erwin Quiring; [pdf] [supp]
[bibtex]
@InProceedings{Muller_2025_CVPR, author = {M\"uller, Andreas and Lukovnikov, Denis and Thietke, Jonas and Fischer, Asja and Quiring, Erwin}, title = {Black-Box Forgery Attacks on Semantic Watermarks for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20937-20946} }
An Image-like Diffusion Method for Human-Object Interaction Detection: Xiaofei Hui,

Haoxuan Qu,

Hossein Rahmani,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hui_2025_CVPR, author = {Hui, Xiaofei and Qu, Haoxuan and Rahmani, Hossein and Liu, Jun}, title = {An Image-like Diffusion Method for Human-Object Interaction Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14002-14012} }
VidSeg: Training-free Video Semantic Segmentation based on Diffusion Models: Qian Wang,

Abdelrahman Eldesokey,

Mohit Mendiratta,

Fangneng Zhan,

Adam Kortylewski,

Christian Theobalt,

Peter Wonka; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qian and Eldesokey, Abdelrahman and Mendiratta, Mohit and Zhan, Fangneng and Kortylewski, Adam and Theobalt, Christian and Wonka, Peter}, title = {VidSeg: Training-free Video Semantic Segmentation based on Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22985-22994} }
COB-GS: Clear Object Boundaries in 3DGS Segmentation Based on Boundary-Adaptive Gaussian Splitting: Jiaxin Zhang,

Junjun Jiang,

Youyu Chen,

Kui Jiang,

Xianming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiaxin and Jiang, Junjun and Chen, Youyu and Jiang, Kui and Liu, Xianming}, title = {COB-GS: Clear Object Boundaries in 3DGS Segmentation Based on Boundary-Adaptive Gaussian Splitting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19335-19344} }
Weakly Supervised Semantic Segmentation via Progressive Confidence Region Expansion: Xiangfeng Xu,

Pinyi Zhang,

Wenxuan Huang,

Yunhang Shen,

Haosheng Chen,

Jingzhong Lin,

Wei Li,

Gaoqi He,

Jiao Xie,

Shaohui Lin; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Xiangfeng and Zhang, Pinyi and Huang, Wenxuan and Shen, Yunhang and Chen, Haosheng and Lin, Jingzhong and Li, Wei and He, Gaoqi and Xie, Jiao and Lin, Shaohui}, title = {Weakly Supervised Semantic Segmentation via Progressive Confidence Region Expansion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9829-9838} }
RELOCATE: A Simple Training-Free Baseline for Visual Query Localization Using Region-Based Representations: Savya Khosla,

Sethuraman T V,

Alexander Schwing,

Derek Hoiem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khosla_2025_CVPR, author = {Khosla, Savya and V, Sethuraman T and Schwing, Alexander and Hoiem, Derek}, title = {RELOCATE: A Simple Training-Free Baseline for Visual Query Localization Using Region-Based Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3697-3706} }
PEER Pressure: Model-to-Model Regularization for Single Source Domain Generalization: Dong Kyu Cho,

Inwoo Hwang,

Sanghack Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_CVPR, author = {Cho, Dong Kyu and Hwang, Inwoo and Lee, Sanghack}, title = {PEER Pressure: Model-to-Model Regularization for Single Source Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15360-15370} }
HOTFormerLoc: Hierarchical Octree Transformer for Versatile Lidar Place Recognition Across Ground and Aerial Views: Ethan Griffiths,

Maryam Haghighat,

Simon Denman,

Clinton Fookes,

Milad Ramezani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Griffiths_2025_CVPR, author = {Griffiths, Ethan and Haghighat, Maryam and Denman, Simon and Fookes, Clinton and Ramezani, Milad}, title = {HOTFormerLoc: Hierarchical Octree Transformer for Versatile Lidar Place Recognition Across Ground and Aerial Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6648-6658} }
Revisiting Fairness in Multitask Learning: A Performance-Driven Approach for Variance Reduction: Xiaohan Qin,

Xiaoxing Wang,

Junchi Yan; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Xiaohan and Wang, Xiaoxing and Yan, Junchi}, title = {Revisiting Fairness in Multitask Learning: A Performance-Driven Approach for Variance Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20492-20501} }
UniK3D: Universal Camera Monocular 3D Estimation: Luigi Piccinelli,

Christos Sakaridis,

Mattia Segu,

Yung-Hsu Yang,

Siyuan Li,

Wim Abbeloos,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Piccinelli_2025_CVPR, author = {Piccinelli, Luigi and Sakaridis, Christos and Segu, Mattia and Yang, Yung-Hsu and Li, Siyuan and Abbeloos, Wim and Van Gool, Luc}, title = {UniK3D: Universal Camera Monocular 3D Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1028-1039} }
ConMo: Controllable Motion Disentanglement and Recomposition for Zero-Shot Motion Transfer: Jiayi Gao,

Zijin Yin,

Changcheng Hua,

Yuxin Peng,

Kongming Liang,

Zhanyu Ma,

Jun Guo,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Jiayi and Yin, Zijin and Hua, Changcheng and Peng, Yuxin and Liang, Kongming and Ma, Zhanyu and Guo, Jun and Liu, Yang}, title = {ConMo: Controllable Motion Disentanglement and Recomposition for Zero-Shot Motion Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7191-7200} }
VideoMage: Multi-Subject and Motion Customization of Text-to-Video Diffusion Models: Chi-Pin Huang,

Yen-Siang Wu,

Hung-Kai Chung,

Kai-Po Chang,

Fu-En Yang,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Chi-Pin and Wu, Yen-Siang and Chung, Hung-Kai and Chang, Kai-Po and Yang, Fu-En and Wang, Yu-Chiang Frank}, title = {VideoMage: Multi-Subject and Motion Customization of Text-to-Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17603-17612} }
AG-VPReID: A Challenging Large-Scale Benchmark for Aerial-Ground Video-based Person Re-Identification: Huy Nguyen,

Kien Nguyen,

Akila Pemasiri,

Feng Liu,

Sridha Sridharan,

Clinton Fookes; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Huy and Nguyen, Kien and Pemasiri, Akila and Liu, Feng and Sridharan, Sridha and Fookes, Clinton}, title = {AG-VPReID: A Challenging Large-Scale Benchmark for Aerial-Ground Video-based Person Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1241-1251} }
EBS-EKF: Accurate and High Frequency Event-based Star Tracking: Albert W. Reed,

Connor Hashemi,

Dennis Melamed,

Nitesh Menon,

Keigo Hirakawa,

Scott McCloskey; [pdf] [supp]
[bibtex]
@InProceedings{Reed_2025_CVPR, author = {Reed, Albert W. and Hashemi, Connor and Melamed, Dennis and Menon, Nitesh and Hirakawa, Keigo and McCloskey, Scott}, title = {EBS-EKF: Accurate and High Frequency Event-based Star Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6510-6519} }
PersonaBooth: Personalized Text-to-Motion Generation: Boeun Kim,

Hea In Jeong,

JungHoon Sung,

Yihua Cheng,

Jeongmin Lee,

Ju Yong Chang,

Sang-Il Choi,

Younggeun Choi,

Saim Shin,

Jungho Kim,

Hyung Jin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Boeun and Jeong, Hea In and Sung, JungHoon and Cheng, Yihua and Lee, Jeongmin and Chang, Ju Yong and Choi, Sang-Il and Choi, Younggeun and Shin, Saim and Kim, Jungho and Chang, Hyung Jin}, title = {PersonaBooth: Personalized Text-to-Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22756-22765} }
Benchmarking Object Detectors under Real-World Distribution Shifts in Satellite Imagery: Sara A. Al-Emadi,

Yin Yang,

Ferda Ofli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Al-Emadi_2025_CVPR, author = {Al-Emadi, Sara A. and Yang, Yin and Ofli, Ferda}, title = {Benchmarking Object Detectors under Real-World Distribution Shifts in Satellite Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8299-8309} }
SAIST: Segment Any Infrared Small Target Model Guided by Contrastive Language-Image Pretraining: Mingjin Zhang,

Xiaolong Li,

Fei Gao,

Jie Guo,

Xinbo Gao,

Jing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Mingjin and Li, Xiaolong and Gao, Fei and Guo, Jie and Gao, Xinbo and Zhang, Jing}, title = {SAIST: Segment Any Infrared Small Target Model Guided by Contrastive Language-Image Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9549-9558} }
Star with Bilinear Mapping: Zelin Peng,

Yu Huang,

Zhengqin Xu,

Feilong Tang,

Ming Hu,

Xiaokang Yang,

Wei Shen; [pdf]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Huang, Yu and Xu, Zhengqin and Tang, Feilong and Hu, Ming and Yang, Xiaokang and Shen, Wei}, title = {Star with Bilinear Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25292-25302} }
Closed-Loop Supervised Fine-Tuning of Tokenized Traffic Models: Zhejun Zhang,

Peter Karkus,

Maximilian Igl,

Wenhao Ding,

Yuxiao Chen,

Boris Ivanovic,

Marco Pavone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhejun and Karkus, Peter and Igl, Maximilian and Ding, Wenhao and Chen, Yuxiao and Ivanovic, Boris and Pavone, Marco}, title = {Closed-Loop Supervised Fine-Tuning of Tokenized Traffic Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5422-5432} }
DIFIX3D+: Improving 3D Reconstructions with Single-Step Diffusion Models: Jay Zhangjie Wu,

Yuxuan Zhang,

Haithem Turki,

Xuanchi Ren,

Jun Gao,

Mike Zheng Shou,

Sanja Fidler,

Zan Gojcic,

Huan Ling; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jay Zhangjie and Zhang, Yuxuan and Turki, Haithem and Ren, Xuanchi and Gao, Jun and Shou, Mike Zheng and Fidler, Sanja and Gojcic, Zan and Ling, Huan}, title = {DIFIX3D+: Improving 3D Reconstructions with Single-Step Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26024-26035} }
Time of the Flight of the Gaussians: Optimizing Depth Indirectly in Dynamic Radiance Fields: Runfeng Li,

Mikhail Okunev,

Zixuan Guo,

Anh Ha Duong,

Christian Richardt,

Matthew O'Toole,

James Tompkin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Runfeng and Okunev, Mikhail and Guo, Zixuan and Duong, Anh Ha and Richardt, Christian and O'Toole, Matthew and Tompkin, James}, title = {Time of the Flight of the Gaussians: Optimizing Depth Indirectly in Dynamic Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21021-21030} }
Align3R: Aligned Monocular Depth Estimation for Dynamic Videos: Jiahao Lu,

Tianyu Huang,

Peng Li,

Zhiyang Dou,

Cheng Lin,

Zhiming Cui,

Zhen Dong,

Sai-Kit Yeung,

Wenping Wang,

Yuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Jiahao and Huang, Tianyu and Li, Peng and Dou, Zhiyang and Lin, Cheng and Cui, Zhiming and Dong, Zhen and Yeung, Sai-Kit and Wang, Wenping and Liu, Yuan}, title = {Align3R: Aligned Monocular Depth Estimation for Dynamic Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22820-22830} }
Compositional Caching for Training-free Open-vocabulary Attribute Detection: Marco Garosi,

Alessandro Conti,

Gaowen Liu,

Elisa Ricci,

Massimiliano Mancini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garosi_2025_CVPR, author = {Garosi, Marco and Conti, Alessandro and Liu, Gaowen and Ricci, Elisa and Mancini, Massimiliano}, title = {Compositional Caching for Training-free Open-vocabulary Attribute Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15098-15107} }
Seek Common Ground While Reserving Differences: Semi-Supervised Image-Text Sentiment Recognition: Wuyou Xia,

Guoli Jia,

Sicheng Zhao,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Wuyou and Jia, Guoli and Zhao, Sicheng and Yang, Jufeng}, title = {Seek Common Ground While Reserving Differences: Semi-Supervised Image-Text Sentiment Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29601-29611} }
CoLLM: A Large Language Model for Composed Image Retrieval: Chuong Huynh,

Jinyu Yang,

Ashish Tawari,

Mubarak Shah,

Son Tran,

Raffay Hamid,

Trishul Chilimbi,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huynh_2025_CVPR, author = {Huynh, Chuong and Yang, Jinyu and Tawari, Ashish and Shah, Mubarak and Tran, Son and Hamid, Raffay and Chilimbi, Trishul and Shrivastava, Abhinav}, title = {CoLLM: A Large Language Model for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3994-4004} }
Anomize: Better Open Vocabulary Video Anomaly Detection: Fei Li,

Wenxuan Liu,

Jingjing Chen,

Ruixu Zhang,

Yuran Wang,

Xian Zhong,

Zheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Fei and Liu, Wenxuan and Chen, Jingjing and Zhang, Ruixu and Wang, Yuran and Zhong, Xian and Wang, Zheng}, title = {Anomize: Better Open Vocabulary Video Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29203-29212} }
Efficient Diffusion as Low Light Enhancer: Guanzhou Lan,

Qianli Ma,

Yuqi Yang,

Zhigang Wang,

Dong Wang,

Xuelong Li,

Bin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lan_2025_CVPR, author = {Lan, Guanzhou and Ma, Qianli and Yang, Yuqi and Wang, Zhigang and Wang, Dong and Li, Xuelong and Zhao, Bin}, title = {Efficient Diffusion as Low Light Enhancer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21277-21286} }
GraphMimic: Graph-to-Graphs Generative Modeling from Videos for Policy Learning: Guangyan Chen,

Te Cui,

Meiling Wang,

Chengcai Yang,

Mengxiao Hu,

Haoyang Lu,

Yao Mu,

Zicai Peng,

Tianxing Zhou,

Xinran Jiang,

Yi Yang,

Yufeng Yue; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Guangyan and Cui, Te and Wang, Meiling and Yang, Chengcai and Hu, Mengxiao and Lu, Haoyang and Mu, Yao and Peng, Zicai and Zhou, Tianxing and Jiang, Xinran and Yang, Yi and Yue, Yufeng}, title = {GraphMimic: Graph-to-Graphs Generative Modeling from Videos for Policy Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1756-1768} }
VI^3NR: Variance Informed Initialization for Implicit Neural Representations: Chamin Hewa Koneputugodage,

Yizhak Ben-Shabat,

Sameera Ramasinghe,

Stephen Gould; [pdf] [supp]
[bibtex]
@InProceedings{Koneputugodage_2025_CVPR, author = {Koneputugodage, Chamin Hewa and Ben-Shabat, Yizhak and Ramasinghe, Sameera and Gould, Stephen}, title = {VI{\textasciicircum}3NR: Variance Informed Initialization for Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13477-13486} }
MMVU: Measuring Expert-Level Multi-Discipline Video Understanding: Yilun Zhao,

Haowei Zhang,

Lujing Xie,

Tongyan Hu,

Guo Gan,

Yitao Long,

Zhiyuan Hu,

Weiyuan Chen,

Chuhan Li,

Zhijian Xu,

Chengye Wang,

Ziyao Shangguan,

Zhenwen Liang,

Yixin Liu,

Chen Zhao,

Arman Cohan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yilun and Zhang, Haowei and Xie, Lujing and Hu, Tongyan and Gan, Guo and Long, Yitao and Hu, Zhiyuan and Chen, Weiyuan and Li, Chuhan and Xu, Zhijian and Wang, Chengye and Shangguan, Ziyao and Liang, Zhenwen and Liu, Yixin and Zhao, Chen and Cohan, Arman}, title = {MMVU: Measuring Expert-Level Multi-Discipline Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8475-8489} }
M-LLM Based Video Frame Selection for Efficient Video Understanding: Kai Hu,

Feng Gao,

Xiaohan Nie,

Peng Zhou,

Son Tran,

Tal Neiman,

Lingyun Wang,

Mubarak Shah,

Raffay Hamid,

Bing Yin,

Trishul Chilimbi; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Kai and Gao, Feng and Nie, Xiaohan and Zhou, Peng and Tran, Son and Neiman, Tal and Wang, Lingyun and Shah, Mubarak and Hamid, Raffay and Yin, Bing and Chilimbi, Trishul}, title = {M-LLM Based Video Frame Selection for Efficient Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13702-13712} }
Search and Detect: Training-Free Long Tail Object Detection via Web-Image Retrieval: Mankeerat Sidhu,

Hetarth Chopra,

Ansel Blume,

Jeonghwan Kim,

Revanth Gangi Reddy,

Heng Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sidhu_2025_CVPR, author = {Sidhu, Mankeerat and Chopra, Hetarth and Blume, Ansel and Kim, Jeonghwan and Reddy, Revanth Gangi and Ji, Heng}, title = {Search and Detect: Training-Free Long Tail Object Detection via Web-Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15129-15138} }
EgoLM: Multi-Modal Language Model of Egocentric Motions: Fangzhou Hong,

Vladimir Guzov,

Hyo Jin Kim,

Yuting Ye,

Richard Newcombe,

Ziwei Liu,

Lingni Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Fangzhou and Guzov, Vladimir and Kim, Hyo Jin and Ye, Yuting and Newcombe, Richard and Liu, Ziwei and Ma, Lingni}, title = {EgoLM: Multi-Modal Language Model of Egocentric Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5344-5354} }
Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation: Zhuoman Liu,

Weicai Ye,

Yan Luximon,

Pengfei Wan,

Di Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhuoman and Ye, Weicai and Luximon, Yan and Wan, Pengfei and Zhang, Di}, title = {Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11016-11025} }
Diffusion Model is Effectively Its Own Teacher: Xinyin Ma,

Runpeng Yu,

Songhua Liu,

Gongfan Fang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Xinyin and Yu, Runpeng and Liu, Songhua and Fang, Gongfan and Wang, Xinchao}, title = {Diffusion Model is Effectively Its Own Teacher}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12901-12911} }
Long Video Diffusion Generation with Segmented Cross-Attention and Content-Rich Video Data Curation: Xin Yan,

Yuxuan Cai,

Qiuyue Wang,

Yuan Zhou,

Wenhao Huang,

Huan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Xin and Cai, Yuxuan and Wang, Qiuyue and Zhou, Yuan and Huang, Wenhao and Yang, Huan}, title = {Long Video Diffusion Generation with Segmented Cross-Attention and Content-Rich Video Data Curation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3184-3194} }
Learning Heterogeneous Tissues with Mixture of Experts for Gigapixel Whole Slide Images: Junxian Wu,

Minheng Chen,

Xinyi Ke,

Tianwang Xun,

Xiaoming Jiang,

Hongyu Zhou,

Lizhi Shao,

Youyong Kong; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Junxian and Chen, Minheng and Ke, Xinyi and Xun, Tianwang and Jiang, Xiaoming and Zhou, Hongyu and Shao, Lizhi and Kong, Youyong}, title = {Learning Heterogeneous Tissues with Mixture of Experts for Gigapixel Whole Slide Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5144-5153} }
HyperNVD: Accelerating Neural Video Decomposition via Hypernetworks: Maria Pilligua,

Danna Xue,

Javier Vazquez-Corral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pilligua_2025_CVPR, author = {Pilligua, Maria and Xue, Danna and Vazquez-Corral, Javier}, title = {HyperNVD: Accelerating Neural Video Decomposition via Hypernetworks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22933-22942} }
UnCommon Objects in 3D: Xingchen Liu,

Piyush Tayal,

Jianyuan Wang,

Jesus Zarzar,

Tom Monnier,

Konstantinos Tertikas,

Jiali Duan,

Antoine Toisoul,

Jason Y. Zhang,

Natalia Neverova,

Andrea Vedaldi,

Roman Shapovalov,

David Novotny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xingchen and Tayal, Piyush and Wang, Jianyuan and Zarzar, Jesus and Monnier, Tom and Tertikas, Konstantinos and Duan, Jiali and Toisoul, Antoine and Zhang, Jason Y. and Neverova, Natalia and Vedaldi, Andrea and Shapovalov, Roman and Novotny, David}, title = {UnCommon Objects in 3D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14102-14113} }
Disentangled Pose and Appearance Guidance for Multi-Pose Generation: Tengfei Xiao,

Yue Wu,

Yuelong Li,

Can Qin,

Maoguo Gong,

Qiguang Miao,

Wenping Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Tengfei and Wu, Yue and Li, Yuelong and Qin, Can and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {Disentangled Pose and Appearance Guidance for Multi-Pose Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5646-5655} }
Mind the Gap: Confidence Discrepancy Can Guide Federated Semi-Supervised Learning Across Pseudo-Mismatch: Yijie Liu,

Xinyi Shang,

Yiqun Zhang,

Yang Lu,

Chen Gong,

Jing-Hao Xue,

Hanzi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yijie and Shang, Xinyi and Zhang, Yiqun and Lu, Yang and Gong, Chen and Xue, Jing-Hao and Wang, Hanzi}, title = {Mind the Gap: Confidence Discrepancy Can Guide Federated Semi-Supervised Learning Across Pseudo-Mismatch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10173-10182} }
Instant Adversarial Purification with Adversarial Consistency Distillation: Chun Tong Lei,

Hon Ming Yam,

Zhongliang Guo,

Yifei Qian,

Chun Pong Lau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Chun Tong and Yam, Hon Ming and Guo, Zhongliang and Qian, Yifei and Lau, Chun Pong}, title = {Instant Adversarial Purification with Adversarial Consistency Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24331-24340} }
Learning Textual Prompts for Open-World Semi-Supervised Learning: Yuxin Fan,

Junbiao Cui,

Jiye Liang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Yuxin and Cui, Junbiao and Liang, Jiye}, title = {Learning Textual Prompts for Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14756-14765} }
Electromyography-Informed Facial Expression Reconstruction for Physiological-Based Synthesis and Analysis: Tim Büchner,

Christoph Anders,

Orlando Guntinas-Lichius,

Joachim Denzler; [pdf] [supp]
[bibtex]
@InProceedings{Buchner_2025_CVPR, author = {B\"uchner, Tim and Anders, Christoph and Guntinas-Lichius, Orlando and Denzler, Joachim}, title = {Electromyography-Informed Facial Expression Reconstruction for Physiological-Based Synthesis and Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {215-227} }
LongDiff: Training-Free Long Video Generation in One Go: Zhuoling Li,

Hossein Rahmani,

Qiuhong Ke,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhuoling and Rahmani, Hossein and Ke, Qiuhong and Liu, Jun}, title = {LongDiff: Training-Free Long Video Generation in One Go}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17789-17798} }
Feature Selection for Latent Factor Models: Rittwika Kansabanik,

Adrian Barbu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kansabanik_2025_CVPR, author = {Kansabanik, Rittwika and Barbu, Adrian}, title = {Feature Selection for Latent Factor Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30742-30751} }
Preserve or Modify? Context-Aware Evaluation for Balancing Preservation and Modification in Text-Guided Image Editing: Yoonjeon Kim,

Soohyun Ryu,

Yeonsung Jung,

Hyunkoo Lee,

Joowon Kim,

June Yong Yang,

Jaeryong Hwang,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Yoonjeon and Ryu, Soohyun and Jung, Yeonsung and Lee, Hyunkoo and Kim, Joowon and Yang, June Yong and Hwang, Jaeryong and Yang, Eunho}, title = {Preserve or Modify? Context-Aware Evaluation for Balancing Preservation and Modification in Text-Guided Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23474-23483} }
Mask-Adapter: The Devil is in the Masks for Open-Vocabulary Segmentation: Yongkang Li,

Tianheng Cheng,

Bin Feng,

Wenyu Liu,

Xinggang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yongkang and Cheng, Tianheng and Feng, Bin and Liu, Wenyu and Wang, Xinggang}, title = {Mask-Adapter: The Devil is in the Masks for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14998-15008} }
MPDrive: Improving Spatial Understanding with Marker-Based Prompt Learning for Autonomous Driving: Zhiyuan Zhang,

Xiaofan Li,

Zhihao Xu,

Wenjie Peng,

Zijian Zhou,

Miaojing Shi,

Shuangping Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhiyuan and Li, Xiaofan and Xu, Zhihao and Peng, Wenjie and Zhou, Zijian and Shi, Miaojing and Huang, Shuangping}, title = {MPDrive: Improving Spatial Understanding with Marker-Based Prompt Learning for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12089-12099} }
Improving the Transferability of Adversarial Attacks on Face Recognition with Diverse Parameters Augmentation: Fengfan Zhou,

Bangjie Yin,

Hefei Ling,

Qianyu Zhou,

Wenxuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Fengfan and Yin, Bangjie and Ling, Hefei and Zhou, Qianyu and Wang, Wenxuan}, title = {Improving the Transferability of Adversarial Attacks on Face Recognition with Diverse Parameters Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3516-3527} }
Adapting to Observation Length of Trajectory Prediction via Contrastive Learning: Ruiqi Qiu,

Jun Gong,

Xinyu Zhang,

Siqi Luo,

Bowen Zhang,

Yi Cen; [pdf]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Ruiqi and Gong, Jun and Zhang, Xinyu and Luo, Siqi and Zhang, Bowen and Cen, Yi}, title = {Adapting to Observation Length of Trajectory Prediction via Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1645-1654} }
Fine-Grained Image-Text Correspondence with Cost Aggregation for Open-Vocabulary Part Segmentation: Jiho Choi,

Seonho Lee,

Minhyun Lee,

Seungho Lee,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Jiho and Lee, Seonho and Lee, Minhyun and Lee, Seungho and Shim, Hyunjung}, title = {Fine-Grained Image-Text Correspondence with Cost Aggregation for Open-Vocabulary Part Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9782-9793} }
NitroFusion: High-Fidelity Single-Step Diffusion through Dynamic Adversarial Training: Dar-Yen Chen,

Hmrishav Bandyopadhyay,

Kai Zou,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Dar-Yen and Bandyopadhyay, Hmrishav and Zou, Kai and Song, Yi-Zhe}, title = {NitroFusion: High-Fidelity Single-Step Diffusion through Dynamic Adversarial Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7654-7663} }
ByTheWay: Boost Your Text-to-Video Generation Model to Higher Quality in a Training-free Way: Jiazi Bu,

Pengyang Ling,

Pan Zhang,

Tong Wu,

Xiaoyi Dong,

Yuhang Zang,

Yuhang Cao,

Dahua Lin,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bu_2025_CVPR, author = {Bu, Jiazi and Ling, Pengyang and Zhang, Pan and Wu, Tong and Dong, Xiaoyi and Zang, Yuhang and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {ByTheWay: Boost Your Text-to-Video Generation Model to Higher Quality in a Training-free Way}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12999-13008} }
CMMLoc: Advancing Text-to-PointCloud Localization with Cauchy-Mixture-Model Based Framework: Yanlong Xu,

Haoxuan Qu,

Jun Liu,

Wenxiao Zhang,

Xun Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yanlong and Qu, Haoxuan and Liu, Jun and Zhang, Wenxiao and Yang, Xun}, title = {CMMLoc: Advancing Text-to-PointCloud Localization with Cauchy-Mixture-Model Based Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6637-6647} }
Masked Point-Entity Contrast for Open-Vocabulary 3D Scene Understanding: Yan Wang,

Baoxiong Jia,

Ziyu Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yan and Jia, Baoxiong and Zhu, Ziyu and Huang, Siyuan}, title = {Masked Point-Entity Contrast for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14125-14136} }
Decoupling Training-Free Guided Diffusion by ADMM: Youyuan Zhang,

Zehua Liu,

Zenan Li,

Zhaoyu Li,

James J. Clark,

Xujie Si; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Youyuan and Liu, Zehua and Li, Zenan and Li, Zhaoyu and Clark, James J. and Si, Xujie}, title = {Decoupling Training-Free Guided Diffusion by ADMM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23292-23302} }
On the Generalization of Handwritten Text Recognition Models: Carlos Garrido-Munoz,

Jorge Calvo-Zaragoza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garrido-Munoz_2025_CVPR, author = {Garrido-Munoz, Carlos and Calvo-Zaragoza, Jorge}, title = {On the Generalization of Handwritten Text Recognition Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15275-15286} }
SwiftEdit: Lightning Fast Text-Guided Image Editing via One-Step Diffusion: Trong-Tung Nguyen,

Quang Nguyen,

Khoi Nguyen,

Anh Tran,

Cuong Pham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Trong-Tung and Nguyen, Quang and Nguyen, Khoi and Tran, Anh and Pham, Cuong}, title = {SwiftEdit: Lightning Fast Text-Guided Image Editing via One-Step Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21492-21501} }
Learning from Synchronization: Self-Supervised Uncalibrated Multi-View Person Association in Challenging Scenes: Keqi Chen,

Vinkle Srivastav,

Didier Mutter,

Nicolas Padoy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Keqi and Srivastav, Vinkle and Mutter, Didier and Padoy, Nicolas}, title = {Learning from Synchronization: Self-Supervised Uncalibrated Multi-View Person Association in Challenging Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24419-24428} }
RC-AutoCalib: An End-to-End Radar-Camera Automatic Calibration Network: Van-Tin Luu,

Yon-Lin Cai,

Vu-Hoang Tran,

Wei-Chen Chiu,

Yi-Ting Chen,

Ching-Chun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Luu_2025_CVPR, author = {Luu, Van-Tin and Cai, Yon-Lin and Tran, Vu-Hoang and Chiu, Wei-Chen and Chen, Yi-Ting and Huang, Ching-Chun}, title = {RC-AutoCalib: An End-to-End Radar-Camera Automatic Calibration Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6700-6709} }
Argus: A Compact and Versatile Foundation Model for Vision: Weiming Zhuang,

Chen Chen,

Zhizhong Li,

Sina Sajadmanesh,

Jingtao Li,

Jiabo Huang,

Vikash Sehwag,

Vivek Sharma,

Hirotaka Shinozaki,

Felan Carlo Garcia,

Yihao Zhan,

Naohiro Adachi,

Ryoji Eki,

Michael Spranger,

Peter Stone,

Lingjuan Lyu; [pdf] [supp]
[bibtex]
@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Weiming and Chen, Chen and Li, Zhizhong and Sajadmanesh, Sina and Li, Jingtao and Huang, Jiabo and Sehwag, Vikash and Sharma, Vivek and Shinozaki, Hirotaka and Garcia, Felan Carlo and Zhan, Yihao and Adachi, Naohiro and Eki, Ryoji and Spranger, Michael and Stone, Peter and Lyu, Lingjuan}, title = {Argus: A Compact and Versatile Foundation Model for Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4418-4429} }
CLIP-driven Coarse-to-fine Semantic Guidance for Fine-grained Open-set Semi-supervised Learning: Xiaokun Li,

Yaping Huang,

Qingji Guan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaokun and Huang, Yaping and Guan, Qingji}, title = {CLIP-driven Coarse-to-fine Semantic Guidance for Fine-grained Open-set Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30312-30321} }
InsTaG: Learning Personalized 3D Talking Head from Few-Second Video: Jiahe Li,

Jiawei Zhang,

Xiao Bai,

Jin Zheng,

Jun Zhou,

Lin Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiahe and Zhang, Jiawei and Bai, Xiao and Zheng, Jin and Zhou, Jun and Gu, Lin}, title = {InsTaG: Learning Personalized 3D Talking Head from Few-Second Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10690-10700} }
Sampling Innovation-Based Adaptive Compressive Sensing: Zhifu Tian,

Tao Hu,

Chaoyang Niu,

Di Wu,

Shu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zhifu and Hu, Tao and Niu, Chaoyang and Wu, Di and Wang, Shu}, title = {Sampling Innovation-Based Adaptive Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2387-2397} }
A Simple Data Augmentation for Feature Distribution Skewed Federated Learning: Yunlu Yan,

Huazhu Fu,

Yuexiang Li,

Jinheng Xie,

Jun Ma,

Guang Yang,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Yunlu and Fu, Huazhu and Li, Yuexiang and Xie, Jinheng and Ma, Jun and Yang, Guang and Zhu, Lei}, title = {A Simple Data Augmentation for Feature Distribution Skewed Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25749-25758} }
MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models: Wenyi Hong,

Yean Cheng,

Zhuoyi Yang,

Weihan Wang,

Lefan Wang,

Xiaotao Gu,

Shiyu Huang,

Yuxiao Dong,

Jie Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Wenyi and Cheng, Yean and Yang, Zhuoyi and Wang, Weihan and Wang, Lefan and Gu, Xiaotao and Huang, Shiyu and Dong, Yuxiao and Tang, Jie}, title = {MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8450-8460} }
Benchmarking Large Vision-Language Models via Directed Scene Graph for Comprehensive Image Captioning: Fan Lu,

Wei Wu,

Kecheng Zheng,

Shuailei Ma,

Biao Gong,

Jiawei Liu,

Wei Zhai,

Yang Cao,

Yujun Shen,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Fan and Wu, Wei and Zheng, Kecheng and Ma, Shuailei and Gong, Biao and Liu, Jiawei and Zhai, Wei and Cao, Yang and Shen, Yujun and Zha, Zheng-Jun}, title = {Benchmarking Large Vision-Language Models via Directed Scene Graph for Comprehensive Image Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19618-19627} }
ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation: Yifan Pu,

Yiming Zhao,

Zhicong Tang,

Ruihong Yin,

Haoxing Ye,

Yuhui Yuan,

Dong Chen,

Jianmin Bao,

Sirui Zhang,

Yanbin Wang,

Lin Liang,

Lijuan Wang,

Ji Li,

Xiu Li,

Zhouhui Lian,

Gao Huang,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pu_2025_CVPR, author = {Pu, Yifan and Zhao, Yiming and Tang, Zhicong and Yin, Ruihong and Ye, Haoxing and Yuan, Yuhui and Chen, Dong and Bao, Jianmin and Zhang, Sirui and Wang, Yanbin and Liang, Lin and Wang, Lijuan and Li, Ji and Li, Xiu and Lian, Zhouhui and Huang, Gao and Guo, Baining}, title = {ART: Anonymous Region Transformer for Variable Multi-Layer Transparent Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7952-7962} }
Rotation-Equivariant Self-Supervised Method in Image Denoising: Hanze Liu,

Jiahong Fu,

Qi Xie,

Deyu Meng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hanze and Fu, Jiahong and Xie, Qi and Meng, Deyu}, title = {Rotation-Equivariant Self-Supervised Method in Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12720-12730} }
ArcPro: Architectural Programs for Structured 3D Abstraction of Sparse Points: Qirui Huang,

Runze Zhang,

Kangjun Liu,

Minglun Gong,

Hao Zhang,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Qirui and Zhang, Runze and Liu, Kangjun and Gong, Minglun and Zhang, Hao and Huang, Hui}, title = {ArcPro: Architectural Programs for Structured 3D Abstraction of Sparse Points}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6563-6572} }
GLane3D: Detecting Lanes with Graph of 3D Keypoints: Halil İbrahim Öztürk,

Muhammet Esat Kalfaoğlu,

Ozsel Kilinc; [pdf] [supp]
[bibtex]
@InProceedings{Ozturk_2025_CVPR, author = {\"Ozt\"urk, Halil \.Ibrahim and Kalfao\u{g}lu, Muhammet Esat and Kilinc, Ozsel}, title = {GLane3D: Detecting Lanes with Graph of 3D Keypoints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27508-27518} }
Minimal Interaction Seperated Tuning: A New Paradigm for Visual Adaptation: Ningyuan Tang,

Minghao Fu,

Jianxin Wu; [pdf]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Ningyuan and Fu, Minghao and Wu, Jianxin}, title = {Minimal Interaction Seperated Tuning: A New Paradigm for Visual Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25208-25217} }
Hardware-Rasterized Ray-Based Gaussian Splatting: Samuel Rota Bulò,

Nemanja Bartolovic,

Lorenzo Porzi,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Bulo_2025_CVPR, author = {Bul\`o, Samuel Rota and Bartolovic, Nemanja and Porzi, Lorenzo and Kontschieder, Peter}, title = {Hardware-Rasterized Ray-Based Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {485-494} }
FlashSloth : Lightning Multimodal Large Language Models via Embedded Visual Compression: Bo Tong,

Bokai Lai,

Yiyi Zhou,

Gen Luo,

Yunhang Shen,

Ke Li,

Xiaoshuai Sun,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2025_CVPR, author = {Tong, Bo and Lai, Bokai and Zhou, Yiyi and Luo, Gen and Shen, Yunhang and Li, Ke and Sun, Xiaoshuai and Ji, Rongrong}, title = {FlashSloth : Lightning Multimodal Large Language Models via Embedded Visual Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14570-14581} }
FreqDebias: Towards Generalizable Deepfake Detection via Consistency-Driven Frequency Debiasing: Hossein Kashiani,

Niloufar Alipour Talemi,

Fatemeh Afghah; [pdf] [supp]
[bibtex]
@InProceedings{Kashiani_2025_CVPR, author = {Kashiani, Hossein and Talemi, Niloufar Alipour and Afghah, Fatemeh}, title = {FreqDebias: Towards Generalizable Deepfake Detection via Consistency-Driven Frequency Debiasing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8775-8785} }
Multi-subject Open-set Personalization in Video Generation: Tsai-Shien Chen,

Aliaksandr Siarohin,

Willi Menapace,

Yuwei Fang,

Kwot Sin Lee,

Ivan Skorokhodov,

Kfir Aberman,

Jun-Yan Zhu,

Ming-Hsuan Yang,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Menapace, Willi and Fang, Yuwei and Lee, Kwot Sin and Skorokhodov, Ivan and Aberman, Kfir and Zhu, Jun-Yan and Yang, Ming-Hsuan and Tulyakov, Sergey}, title = {Multi-subject Open-set Personalization in Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6099-6110} }
Wav2Sem: Plug-and-Play Audio Semantic Decoupling for 3D Speech-Driven Facial Animation: Hao Li,

Ju Dai,

Xin Zhao,

Feng Zhou,

Junjun Pan,

Lei Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hao and Dai, Ju and Zhao, Xin and Zhou, Feng and Pan, Junjun and Li, Lei}, title = {Wav2Sem: Plug-and-Play Audio Semantic Decoupling for 3D Speech-Driven Facial Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {183-192} }
Attraction Diminishing and Distributing for Few-Shot Class-Incremental Learning: Li-Jun Zhao,

Zhen-Duo Chen,

Yongxin Wang,

Xin Luo,

Xin-Shun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Li-Jun and Chen, Zhen-Duo and Wang, Yongxin and Luo, Xin and Xu, Xin-Shun}, title = {Attraction Diminishing and Distributing for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25657-25666} }
4DTAM: Non-Rigid Tracking and Mapping via Dynamic Surface Gaussians: Hidenobu Matsuki,

Gwangbin Bae,

Andrew J. Davison; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsuki_2025_CVPR, author = {Matsuki, Hidenobu and Bae, Gwangbin and Davison, Andrew J.}, title = {4DTAM: Non-Rigid Tracking and Mapping via Dynamic Surface Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26921-26932} }
T2SG: Traffic Topology Scene Graph for Topology Reasoning in Autonomous Driving: Changsheng Lv,

Mengshi Qi,

Liang Liu,

Huadong Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Changsheng and Qi, Mengshi and Liu, Liang and Ma, Huadong}, title = {T2SG: Traffic Topology Scene Graph for Topology Reasoning in Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17197-17206} }
Unseen Visual Anomaly Generation: Han Sun,

Yunkang Cao,

Hao Dong,

Olga Fink; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Han and Cao, Yunkang and Dong, Hao and Fink, Olga}, title = {Unseen Visual Anomaly Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25508-25517} }
T2ICount: Enhancing Cross-modal Understanding for Zero-Shot Counting: Yifei Qian,

Zhongliang Guo,

Bowen Deng,

Chun Tong Lei,

Shuai Zhao,

Chun Pong Lau,

Xiaopeng Hong,

Michael P. Pound; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Yifei and Guo, Zhongliang and Deng, Bowen and Lei, Chun Tong and Zhao, Shuai and Lau, Chun Pong and Hong, Xiaopeng and Pound, Michael P.}, title = {T2ICount: Enhancing Cross-modal Understanding for Zero-Shot Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25336-25345} }
RealEdit: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations: Peter Sushko,

Ayana Bharadwaj,

Zhi Yang Lim,

Vasily Ilin,

Ben Caffee,

Dongping Chen,

Mohammadreza Salehi,

Cheng-Yu Hsieh,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sushko_2025_CVPR, author = {Sushko, Peter and Bharadwaj, Ayana and Lim, Zhi Yang and Ilin, Vasily and Caffee, Ben and Chen, Dongping and Salehi, Mohammadreza and Hsieh, Cheng-Yu and Krishna, Ranjay}, title = {RealEdit: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13403-13413} }
VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step: Hanyang Wang,

Fangfu Liu,

Jiawei Chi,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hanyang and Liu, Fangfu and Chi, Jiawei and Duan, Yueqi}, title = {VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16475-16485} }
3D-HGS: 3D Half-Gaussian Splatting: Haolin Li,

Jinyang Liu,

Mario Sznaier,

Octavia Camps; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haolin and Liu, Jinyang and Sznaier, Mario and Camps, Octavia}, title = {3D-HGS: 3D Half-Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10996-11005} }
FG^2: Fine-Grained Cross-View Localization by Fine-Grained Feature Matching: Zimin Xia,

Alexandre Alahi; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Zimin and Alahi, Alexandre}, title = {FG{\textasciicircum}2: Fine-Grained Cross-View Localization by Fine-Grained Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6362-6372} }
ReNeg: Learning Negative Embedding with Reward Guidance: Xiaomin Li,

Yixuan Liu,

Takashi Isobe,

Xu Jia,

Qinpeng Cui,

Dong Zhou,

Dong Li,

You He,

Huchuan Lu,

Zhongdao Wang,

Emad Barsoum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaomin and Liu, Yixuan and Isobe, Takashi and Jia, Xu and Cui, Qinpeng and Zhou, Dong and Li, Dong and He, You and Lu, Huchuan and Wang, Zhongdao and Barsoum, Emad}, title = {ReNeg: Learning Negative Embedding with Reward Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23636-23645} }
Scale Efficient Training for Large Datasets: Qing Zhou,

Junyu Gao,

Qi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Qing and Gao, Junyu and Wang, Qi}, title = {Scale Efficient Training for Large Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20458-20467} }
Distilled Prompt Learning for Incomplete Multimodal Survival Prediction: Yingxue Xu,

Fengtao Zhou,

Chenyu Zhao,

Yihui Wang,

Can Yang,

Hao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yingxue and Zhou, Fengtao and Zhao, Chenyu and Wang, Yihui and Yang, Can and Chen, Hao}, title = {Distilled Prompt Learning for Incomplete Multimodal Survival Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5102-5111} }
Decoder Gradient Shield: Provable and High-Fidelity Prevention of Gradient-Based Box-Free Watermark Removal: Haonan An,

Guang Hua,

Zhengru Fang,

Guowen Xu,

Susanto Rahardja,

Yuguang Fang; [pdf] [arXiv]
[bibtex]
@InProceedings{An_2025_CVPR, author = {An, Haonan and Hua, Guang and Fang, Zhengru and Xu, Guowen and Rahardja, Susanto and Fang, Yuguang}, title = {Decoder Gradient Shield: Provable and High-Fidelity Prevention of Gradient-Based Box-Free Watermark Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13424-13433} }
MotionPro: A Precise Motion Controller for Image-to-Video Generation: Zhongwei Zhang,

Fuchen Long,

Zhaofan Qiu,

Yingwei Pan,

Wu Liu,

Ting Yao,

Tao Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhongwei and Long, Fuchen and Qiu, Zhaofan and Pan, Yingwei and Liu, Wu and Yao, Ting and Mei, Tao}, title = {MotionPro: A Precise Motion Controller for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27957-27967} }
Goku: Flow Based Video Generative Foundation Models: Shoufa Chen,

Chongjian Ge,

Yuqi Zhang,

Yida Zhang,

Fengda Zhu,

Hao Yang,

Hongxiang Hao,

Hui Wu,

Zhichao Lai,

Yifei Hu,

Ting-Che Lin,

Shilong Zhang,

Fu Li,

Chuan Li,

Xing Wang,

Yanghua Peng,

Peize Sun,

Ping Luo,

Yi Jiang,

Zehuan Yuan,

Bingyue Peng,

Xiaobing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Shoufa and Ge, Chongjian and Zhang, Yuqi and Zhang, Yida and Zhu, Fengda and Yang, Hao and Hao, Hongxiang and Wu, Hui and Lai, Zhichao and Hu, Yifei and Lin, Ting-Che and Zhang, Shilong and Li, Fu and Li, Chuan and Wang, Xing and Peng, Yanghua and Sun, Peize and Luo, Ping and Jiang, Yi and Yuan, Zehuan and Peng, Bingyue and Liu, Xiaobing}, title = {Goku: Flow Based Video Generative Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23516-23527} }
Learning Conditional Space-Time Prompt Distributions for Video Class-Incremental Learning: Xiaohan Zou,

Wenchao Ma,

Shu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2025_CVPR, author = {Zou, Xiaohan and Ma, Wenchao and Zhao, Shu}, title = {Learning Conditional Space-Time Prompt Distributions for Video Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4862-4873} }
Convex Combination Star Shape Prior for Data-driven Image Semantic Segmentation: Xinyu Zhao,

Jun Xie,

Shengzhe Chen,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Xinyu and Xie, Jun and Chen, Shengzhe and Liu, Jun}, title = {Convex Combination Star Shape Prior for Data-driven Image Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14068-14077} }
Hyperbolic Safety-Aware Vision-Language Models: Tobia Poppi,

Tejaswi Kasarla,

Pascal Mettes,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Poppi_2025_CVPR, author = {Poppi, Tobia and Kasarla, Tejaswi and Mettes, Pascal and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Hyperbolic Safety-Aware Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4222-4232} }
WISH: Weakly Supervised Instance Segmentation using Heterogeneous Labels: Hyeokjun Kweon,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Kweon_2025_CVPR, author = {Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {WISH: Weakly Supervised Instance Segmentation using Heterogeneous Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25377-25387} }
SinGS: Animatable Single-Image Human Gaussian Splats with Kinematic Priors: Yufan Wu,

Xuanhong Chen,

Wen Li,

Shunran Jia,

Hualiang Wei,

Kairui Feng,

Jialiang Chen,

Yuhan Li,

Ang He,

Weimin Zhang,

Bingbing Ni,

Wenjun Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yufan and Chen, Xuanhong and Li, Wen and Jia, Shunran and Wei, Hualiang and Feng, Kairui and Chen, Jialiang and Li, Yuhan and He, Ang and Zhang, Weimin and Ni, Bingbing and Zhang, Wenjun}, title = {SinGS: Animatable Single-Image Human Gaussian Splats with Kinematic Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5571-5580} }
Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation: Zelin Peng,

Zhengqin Xu,

Zhilin Zeng,

Yu Huang,

Yaoming Wang,

Wei Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Xu, Zhengqin and Zeng, Zhilin and Huang, Yu and Wang, Yaoming and Shen, Wei}, title = {Parameter-efficient Fine-tuning in Hyperspherical Space for Open-vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15009-15020} }
Relative Pose Estimation through Affine Corrections of Monocular Depth Priors: Yifan Yu,

Shaohui Liu,

Rémi Pautrat,

Marc Pollefeys,

Viktor Larsson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Yifan and Liu, Shaohui and Pautrat, R\'emi and Pollefeys, Marc and Larsson, Viktor}, title = {Relative Pose Estimation through Affine Corrections of Monocular Depth Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16706-16716} }
Zero-1-to-A: Zero-Shot One Image to Animatable Head Avatars Using Video Diffusion: Zhenglin Zhou,

Fan Ma,

Hehe Fan,

Tat-Seng Chua; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zhenglin and Ma, Fan and Fan, Hehe and Chua, Tat-Seng}, title = {Zero-1-to-A: Zero-Shot One Image to Animatable Head Avatars Using Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15941-15952} }
Occlusion-aware Text-Image-Point Cloud Pretraining for Open-World 3D Object Recognition: Khanh Nguyen,

Ghulam Mubashar Hassan,

Ajmal Mian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Khanh and Hassan, Ghulam Mubashar and Mian, Ajmal}, title = {Occlusion-aware Text-Image-Point Cloud Pretraining for Open-World 3D Object Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16965-16975} }
Conical Visual Concentration for Efficient Large Vision-Language Models: Long Xing,

Qidong Huang,

Xiaoyi Dong,

Jiajie Lu,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Conghui He,

Jiaqi Wang,

Feng Wu,

Dahua Lin; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Long and Huang, Qidong and Dong, Xiaoyi and Lu, Jiajie and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and He, Conghui and Wang, Jiaqi and Wu, Feng and Lin, Dahua}, title = {Conical Visual Concentration for Efficient Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14593-14603} }
Good, Cheap, and Fast: Overfitted Image Compression with Wasserstein Distortion: Jona Ballé,

Luca Versari,

Emilien Dupont,

Hyunjik Kim,

Matthias Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Balle_2025_CVPR, author = {Ball\'e, Jona and Versari, Luca and Dupont, Emilien and Kim, Hyunjik and Bauer, Matthias}, title = {Good, Cheap, and Fast: Overfitted Image Compression with Wasserstein Distortion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23259-23268} }
Period-LLM: Extending the Periodic Capability of Multimodal Large Language Model: Yuting Zhang,

Hao Lu,

Qingyong Hu,

Yin Wang,

Kaishen Yuan,

Xin Liu,

Kaishun Wu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuting and Lu, Hao and Hu, Qingyong and Wang, Yin and Yuan, Kaishen and Liu, Xin and Wu, Kaishun}, title = {Period-LLM: Extending the Periodic Capability of Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29237-29247} }
V2X-R: Cooperative LiDAR-4D Radar Fusion with Denoising Diffusion for 3D Object Detection: Xun Huang,

Jinlong Wang,

Qiming Xia,

Siheng Chen,

Bisheng Yang,

Xin Li,

Cheng Wang,

Chenglu Wen; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Xun and Wang, Jinlong and Xia, Qiming and Chen, Siheng and Yang, Bisheng and Li, Xin and Wang, Cheng and Wen, Chenglu}, title = {V2X-R: Cooperative LiDAR-4D Radar Fusion with Denoising Diffusion for 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27390-27400} }
Multi-Modal Synergistic Implicit Image Enhancement for Efficient Optical Flow Estimation: Weichen Dai,

Hexing Wu,

Xiaoyang Weng,

Yuxin Zheng,

Yuhang Ming,

Wanzeng Kong; [pdf]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Weichen and Wu, Hexing and Weng, Xiaoyang and Zheng, Yuxin and Ming, Yuhang and Kong, Wanzeng}, title = {Multi-Modal Synergistic Implicit Image Enhancement for Efficient Optical Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2173-2182} }
TAROT: Towards Essentially Domain-Invariant Robustness with Theoretical Justification: Dongyoon Yang,

Jihu Lee,

Yongdai Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Dongyoon and Lee, Jihu and Kim, Yongdai}, title = {TAROT: Towards Essentially Domain-Invariant Robustness with Theoretical Justification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25780-25789} }
Foundations of the Theory of Performance-Based Ranking: Sébastien Piérard,

Anaïs Halin,

Anthony Cioppa,

Adrien Deliege,

Marc Van Droogenbroeck; [pdf] [supp]
[bibtex]
@InProceedings{Pierard_2025_CVPR, author = {Pi\'erard, S\'ebastien and Halin, Ana{\"\i}s and Cioppa, Anthony and Deliege, Adrien and Van Droogenbroeck, Marc}, title = {Foundations of the Theory of Performance-Based Ranking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14293-14302} }
Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis: Jiangyong Huang,

Baoxiong Jia,

Yan Wang,

Ziyu Zhu,

Xiongkun Linghu,

Qing Li,

Song-Chun Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jiangyong and Jia, Baoxiong and Wang, Yan and Zhu, Ziyu and Linghu, Xiongkun and Li, Qing and Zhu, Song-Chun and Huang, Siyuan}, title = {Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24570-24581} }
Generating Multimodal Driving Scenes via Next-Scene Prediction: Yanhao Wu,

Haoyang Zhang,

Tianwei Lin,

Lichao Huang,

Shujie Luo,

Rui Wu,

Congpei Qiu,

Wei Ke,

Tong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yanhao and Zhang, Haoyang and Lin, Tianwei and Huang, Lichao and Luo, Shujie and Wu, Rui and Qiu, Congpei and Ke, Wei and Zhang, Tong}, title = {Generating Multimodal Driving Scenes via Next-Scene Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6844-6853} }
BIGS: Bimanual Category-agnostic Interaction Reconstruction from Monocular Videos via 3D Gaussian Splatting: Jeongwan On,

Kyeonghwan Gwak,

Gunyoung Kang,

Junuk Cha,

Soohyun Hwang,

Hyein Hwang,

Seungryul Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{On_2025_CVPR, author = {On, Jeongwan and Gwak, Kyeonghwan and Kang, Gunyoung and Cha, Junuk and Hwang, Soohyun and Hwang, Hyein and Baek, Seungryul}, title = {BIGS: Bimanual Category-agnostic Interaction Reconstruction from Monocular Videos via 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17437-17447} }
APT: Adaptive Personalized Training for Diffusion Models with Limited Data: JungWoo Chae,

Jiyoon Kim,

JaeWoong Choi,

Kyungyul Kim,

Sangheum Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chae_2025_CVPR, author = {Chae, JungWoo and Kim, Jiyoon and Choi, JaeWoong and Kim, Kyungyul and Hwang, Sangheum}, title = {APT: Adaptive Personalized Training for Diffusion Models with Limited Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28619-28628} }
Symmetry Strikes Back: From Single-Image Symmetry Detection to 3D Generation: Xiang Li,

Zixuan Huang,

Anh Thai,

James M. Rehg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiang and Huang, Zixuan and Thai, Anh and Rehg, James M.}, title = {Symmetry Strikes Back: From Single-Image Symmetry Detection to 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {743-752} }
Frequency-Biased Synergistic Design for Image Compression and Compensation: Jiaming Liu,

Qi Zheng,

Zihao Liu,

Yilian Zhong,

Peiye Liu,

Tao Liu,

Shusong Xu,

Yanheng Lu,

Sicheng Li,

Dimin Niu,

Yibo Fan; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiaming and Zheng, Qi and Liu, Zihao and Zhong, Yilian and Liu, Peiye and Liu, Tao and Xu, Shusong and Lu, Yanheng and Li, Sicheng and Niu, Dimin and Fan, Yibo}, title = {Frequency-Biased Synergistic Design for Image Compression and Compensation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12820-12829} }
PosterMaker: Towards High-Quality Product Poster Generation with Accurate Text Rendering: Yifan Gao,

Zihang Lin,

Chuanbin Liu,

Min Zhou,

Tiezheng Ge,

Bo Zheng,

Hongtao Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Yifan and Lin, Zihang and Liu, Chuanbin and Zhou, Min and Ge, Tiezheng and Zheng, Bo and Xie, Hongtao}, title = {PosterMaker: Towards High-Quality Product Poster Generation with Accurate Text Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8083-8093} }
Sparse Voxels Rasterization: Real-time High-fidelity Radiance Field Rendering: Cheng Sun,

Jaesung Choe,

Charles Loop,

Wei-Chiu Ma,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Cheng and Choe, Jaesung and Loop, Charles and Ma, Wei-Chiu and Wang, Yu-Chiang Frank}, title = {Sparse Voxels Rasterization: Real-time High-fidelity Radiance Field Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16187-16196} }
Rethinking Personalized Aesthetics Assessment: Employing Physique Aesthetics Assessment as An Exemplification: Haobin Zhong,

Shuai He,

Anlong Ming,

Huadong Ma; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Haobin and He, Shuai and Ming, Anlong and Ma, Huadong}, title = {Rethinking Personalized Aesthetics Assessment: Employing Physique Aesthetics Assessment as An Exemplification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2935-2944} }
You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale: Baorui Ma,

Huachen Gao,

Haoge Deng,

Zhengxiong Luo,

Tiejun Huang,

Lulu Tang,

Xinlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Baorui and Gao, Huachen and Deng, Haoge and Luo, Zhengxiong and Huang, Tiejun and Tang, Lulu and Wang, Xinlong}, title = {You See it, You Got it: Learning 3D Creation on Pose-Free Videos at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2016-2029} }
MambaIC: State Space Models for High-Performance Learned Image Compression: Fanhu Zeng,

Hao Tang,

Yihua Shao,

Siyu Chen,

Ling Shao,

Yan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Fanhu and Tang, Hao and Shao, Yihua and Chen, Siyu and Shao, Ling and Wang, Yan}, title = {MambaIC: State Space Models for High-Performance Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18041-18050} }
SCAP: Transductive Test-Time Adaptation via Supportive Clique-based Attribute Prompting: Chenyu Zhang,

Kunlun Xu,

Zichen Liu,

Yuxin Peng,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyu and Xu, Kunlun and Liu, Zichen and Peng, Yuxin and Zhou, Jiahuan}, title = {SCAP: Transductive Test-Time Adaptation via Supportive Clique-based Attribute Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30032-30041} }
Instant Gaussian Stream: Fast and Generalizable Streaming of Dynamic Scene Reconstruction via Gaussian Splatting: Jinbo Yan,

Rui Peng,

Zhiyan Wang,

Luyang Tang,

Jiayu Yang,

Jie Liang,

Jiahao Wu,

Ronggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Jinbo and Peng, Rui and Wang, Zhiyan and Tang, Luyang and Yang, Jiayu and Liang, Jie and Wu, Jiahao and Wang, Ronggang}, title = {Instant Gaussian Stream: Fast and Generalizable Streaming of Dynamic Scene Reconstruction via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16520-16531} }
Locality-Aware Zero-Shot Human-Object Interaction Detection: Sanghyun Kim,

Deunsol Jung,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sanghyun and Jung, Deunsol and Cho, Minsu}, title = {Locality-Aware Zero-Shot Human-Object Interaction Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20190-20200} }
PEACE: Empowering Geologic Map Holistic Understanding with MLLMs: Yangyu Huang,

Tianyi Gao,

Haoran Xu,

Qihao Zhao,

Yang Song,

Zhipeng Gui,

Tengchao Lv,

Hao Chen,

Lei Cui,

Scarlett Li,

Furu Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yangyu and Gao, Tianyi and Xu, Haoran and Zhao, Qihao and Song, Yang and Gui, Zhipeng and Lv, Tengchao and Chen, Hao and Cui, Lei and Li, Scarlett and Wei, Furu}, title = {PEACE: Empowering Geologic Map Holistic Understanding with MLLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3899-3908} }
Tracktention: Leveraging Point Tracking to Attend Videos Faster and Better: Zihang Lai,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Zihang and Vedaldi, Andrea}, title = {Tracktention: Leveraging Point Tracking to Attend Videos Faster and Better}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22809-22819} }
ConceptGuard: Continual Personalized Text-to-Image Generation with Forgetting and Confusion Mitigation: Zirun Guo,

Tao Jin; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zirun and Jin, Tao}, title = {ConceptGuard: Continual Personalized Text-to-Image Generation with Forgetting and Confusion Mitigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2945-2954} }
Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation: Yu Qi,

Yuanchen Ju,

Tianming Wei,

Chi Chu,

Lawson L.S. Wong,

Huazhe Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Yu and Ju, Yuanchen and Wei, Tianming and Chu, Chi and Wong, Lawson L.S. and Xu, Huazhe}, title = {Two by Two: Learning Multi-Task Pairwise Objects Assembly for Generalizable Robot Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17383-17393} }
SGFormer: Satellite-Ground Fusion for 3D Semantic Scene Completion: Xiyue Guo,

Jiarui Hu,

Junjie Hu,

Hujun Bao,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Xiyue and Hu, Jiarui and Hu, Junjie and Bao, Hujun and Zhang, Guofeng}, title = {SGFormer: Satellite-Ground Fusion for 3D Semantic Scene Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11929-11938} }
MARVEL-40M+: Multi-Level Visual Elaboration for High-Fidelity Text-to-3D Content Creation: Sankalp Sinha,

Mohammad Sadil Khan,

Muhammad Usama,

Shino Sam,

Didier Stricker,

Sk Aziz Ali,

Muhammad Zeshan Afzal; [pdf] [supp]
[bibtex]
@InProceedings{Sinha_2025_CVPR, author = {Sinha, Sankalp and Khan, Mohammad Sadil and Usama, Muhammad and Sam, Shino and Stricker, Didier and Ali, Sk Aziz and Afzal, Muhammad Zeshan}, title = {MARVEL-40M+: Multi-Level Visual Elaboration for High-Fidelity Text-to-3D Content Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8105-8116} }
Random Conditioning for Diffusion Model Compression with Distillation: Dohyun Kim,

Sehwan Park,

Geonhee Han,

Seung Wook Kim,

Paul Hongsuck Seo; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Dohyun and Park, Sehwan and Han, Geonhee and Kim, Seung Wook and Seo, Paul Hongsuck}, title = {Random Conditioning for Diffusion Model Compression with Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18607-18618} }
Hierarchical Gaussian Mixture Model Splatting for Efficient and Part Controllable 3D Generation: Qitong Yang,

Mingtao Feng,

Zijie Wu,

Weisheng Dong,

Fangfang Wu,

Yaonan Wang,

Ajmal Mian; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Qitong and Feng, Mingtao and Wu, Zijie and Dong, Weisheng and Wu, Fangfang and Wang, Yaonan and Mian, Ajmal}, title = {Hierarchical Gaussian Mixture Model Splatting for Efficient and Part Controllable 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11104-11114} }
ERUPT: Efficient Rendering with Unposed Patch Transformer: Maxim V. Shugaev,

Vincent Chen,

Maxim Karrenbach,

Kyle Ashley,

Bridget Kennedy,

Naresh P. Cuntoor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shugaev_2025_CVPR, author = {Shugaev, Maxim V. and Chen, Vincent and Karrenbach, Maxim and Ashley, Kyle and Kennedy, Bridget and Cuntoor, Naresh P.}, title = {ERUPT: Efficient Rendering with Unposed Patch Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6057-6067} }
Rethinking End-to-End 2D to 3D Scene Segmentation in Gaussian Splatting: Runsong Zhu,

Shi Qiu,

Zhengzhe Liu,

Ka-Hei Hui,

Qianyi Wu,

Pheng-Ann Heng,

Chi-Wing Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Runsong and Qiu, Shi and Liu, Zhengzhe and Hui, Ka-Hei and Wu, Qianyi and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {Rethinking End-to-End 2D to 3D Scene Segmentation in Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3656-3665} }
Quad-Pixel Image Defocus Deblurring: A New Benchmark and Model: Hang Chen,

Yin Xie,

Xiaoxiu Peng,

Lihu Sun,

Wenkai Su,

Xiaodong Yang,

Chengming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Hang and Xie, Yin and Peng, Xiaoxiu and Sun, Lihu and Su, Wenkai and Yang, Xiaodong and Liu, Chengming}, title = {Quad-Pixel Image Defocus Deblurring: A New Benchmark and Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5709-5719} }
DocVLM: Make Your VLM an Efficient Reader: Mor Shpigel Nacson,

Aviad Aberdam,

Roy Ganz,

Elad Ben Avraham,

Alona Golts,

Yair Kittenplon,

Shai Mazor,

Ron Litman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nacson_2025_CVPR, author = {Nacson, Mor Shpigel and Aberdam, Aviad and Ganz, Roy and Ben Avraham, Elad and Golts, Alona and Kittenplon, Yair and Mazor, Shai and Litman, Ron}, title = {DocVLM: Make Your VLM an Efficient Reader}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29005-29015} }
Revisiting Source-Free Domain Adaptation: Insights into Representativeness, Generalization, and Variety: Ronghang Zhu,

Mengxuan Hu,

Weiming Zhuang,

Lingjuan Lyu,

Xiang Yu,

Sheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ronghang and Hu, Mengxuan and Zhuang, Weiming and Lyu, Lingjuan and Yu, Xiang and Li, Sheng}, title = {Revisiting Source-Free Domain Adaptation: Insights into Representativeness, Generalization, and Variety}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25688-25697} }
Adaptive Unimodal Regulation for Balanced Multimodal Information Acquisition: Chengxiang Huang,

Yake Wei,

Zequn Yang,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Chengxiang and Wei, Yake and Yang, Zequn and Hu, Di}, title = {Adaptive Unimodal Regulation for Balanced Multimodal Information Acquisition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25854-25863} }
Heterogeneous Skeleton-Based Action Representation Learning: Hongsong Wang,

Xiaoyan Ma,

Jidong Kuang,

Jie Gui; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hongsong and Ma, Xiaoyan and Kuang, Jidong and Gui, Jie}, title = {Heterogeneous Skeleton-Based Action Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19154-19164} }
FLARE: Feed-forward Geometry, Appearance and Camera Estimation from Uncalibrated Sparse Views: Shangzhan Zhang,

Jianyuan Wang,

Yinghao Xu,

Nan Xue,

Christian Rupprecht,

Xiaowei Zhou,

Yujun Shen,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shangzhan and Wang, Jianyuan and Xu, Yinghao and Xue, Nan and Rupprecht, Christian and Zhou, Xiaowei and Shen, Yujun and Wetzstein, Gordon}, title = {FLARE: Feed-forward Geometry, Appearance and Camera Estimation from Uncalibrated Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21936-21947} }
Improving Gaussian Splatting with Localized Points Management: Haosen Yang,

Chenhao Zhang,

Wenqing Wang,

Marco Volino,

Adrian Hilton,

Li Zhang,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Haosen and Zhang, Chenhao and Wang, Wenqing and Volino, Marco and Hilton, Adrian and Zhang, Li and Zhu, Xiatian}, title = {Improving Gaussian Splatting with Localized Points Management}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21696-21705} }
GEAL: Generalizable 3D Affordance Learning with Cross-Modal Consistency: Dongyue Lu,

Lingdong Kong,

Tianxin Huang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Dongyue and Kong, Lingdong and Huang, Tianxin and Lee, Gim Hee}, title = {GEAL: Generalizable 3D Affordance Learning with Cross-Modal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1680-1690} }
Dynamic Derivation and Elimination: Audio Visual Segmentation with Enhanced Audio Semantics: Chen Liu,

Liying Yang,

Peike Li,

Dadong Wang,

Lincheng Li,

Xin Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Chen and Yang, Liying and Li, Peike and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Dynamic Derivation and Elimination: Audio Visual Segmentation with Enhanced Audio Semantics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3131-3141} }
AnyMap: Learning a General Camera Model for Structure-from-Motion with Unknown Distortion in Dynamic Scenes: Andrea Porfiri Dal Cin,

Georgi Dikov,

Jihong Ju,

Mohsen Ghafoorian; [pdf] [supp]
[bibtex]
@InProceedings{Cin_2025_CVPR, author = {Cin, Andrea Porfiri Dal and Dikov, Georgi and Ju, Jihong and Ghafoorian, Mohsen}, title = {AnyMap: Learning a General Camera Model for Structure-from-Motion with Unknown Distortion in Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16674-16684} }
ESC: Erasing Space Concept for Knowledge Deletion: Tae-Young Lee,

Sundong Park,

Minwoo Jeon,

Hyoseok Hwang,

Gyeong-Moon Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Tae-Young and Park, Sundong and Jeon, Minwoo and Hwang, Hyoseok and Park, Gyeong-Moon}, title = {ESC: Erasing Space Concept for Knowledge Deletion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5010-5019} }
Language Guided Concept Bottleneck Models for Interpretable Continual Learning: Lu Yu,

Haoyu Han,

Zhe Tao,

Hantao Yao,

Changsheng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Lu and Han, Haoyu and Tao, Zhe and Yao, Hantao and Xu, Changsheng}, title = {Language Guided Concept Bottleneck Models for Interpretable Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14976-14986} }
One-Way Ticket: Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models: Senmao Li,

Lei Wang,

Kai Wang,

Tao Liu,

Jiehang Xie,

Joost van de Weijer,

Fahad Shahbaz Khan,

Shiqi Yang,

Yaxing Wang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Senmao and Wang, Lei and Wang, Kai and Liu, Tao and Xie, Jiehang and van de Weijer, Joost and Khan, Fahad Shahbaz and Yang, Shiqi and Wang, Yaxing and Yang, Jian}, title = {One-Way Ticket: Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23563-23574} }
Domain Adaptive Diabetic Retinopathy Grading with Model Absence and Flowing Data: Wenxin Su,

Song Tang,

Xiaofeng Liu,

Xiaojing Yi,

Mao Ye,

Chunxiao Zu,

Jiahao Li,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Wenxin and Tang, Song and Liu, Xiaofeng and Yi, Xiaojing and Ye, Mao and Zu, Chunxiao and Li, Jiahao and Zhu, Xiatian}, title = {Domain Adaptive Diabetic Retinopathy Grading with Model Absence and Flowing Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28337-28346} }
Temporal Separation with Entropy Regularization for Knowledge Distillation in Spiking Neural Networks: Kairong Yu,

Chengting Yu,

Tianqing Zhang,

Xiaochen Zhao,

Shu Yang,

Hongwei Wang,

Qiang Zhang,

Qi Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Kairong and Yu, Chengting and Zhang, Tianqing and Zhao, Xiaochen and Yang, Shu and Wang, Hongwei and Zhang, Qiang and Xu, Qi}, title = {Temporal Separation with Entropy Regularization for Knowledge Distillation in Spiking Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8806-8816} }
LoRASculpt: Sculpting LoRA for Harmonizing General and Specialized Knowledge in Multimodal Large Language Models: Jian Liang,

Wenke Huang,

Guancheng Wan,

Qu Yang,

Mang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Jian and Huang, Wenke and Wan, Guancheng and Yang, Qu and Ye, Mang}, title = {LoRASculpt: Sculpting LoRA for Harmonizing General and Specialized Knowledge in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26170-26180} }
SEAL: Semantic Attention Learning for Long Video Representation: Lan Wang,

Yujia Chen,

Du Tran,

Vishnu Naresh Boddeti,

Wen-Sheng Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lan and Chen, Yujia and Tran, Du and Boddeti, Vishnu Naresh and Chu, Wen-Sheng}, title = {SEAL: Semantic Attention Learning for Long Video Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26192-26201} }
Re-HOLD: Video Hand Object Interaction Reenactment via adaptive Layout-instructed Diffusion Model: Yingying Fan,

Quanwei Yang,

Kaisiyuan Wang,

Hang Zhou,

Yingying Li,

Haocheng Feng,

Errui Ding,

Yu Wu,

Jingdong Wang; [pdf]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Yingying and Yang, Quanwei and Wang, Kaisiyuan and Zhou, Hang and Li, Yingying and Feng, Haocheng and Ding, Errui and Wu, Yu and Wang, Jingdong}, title = {Re-HOLD: Video Hand Object Interaction Reenactment via adaptive Layout-instructed Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17550-17560} }
Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems: Song Xia,

Yi Yu,

Wenhan Yang,

Meiwen Ding,

Zhuo Chen,

Ling-Yu Duan,

Alex C. Kot,

Xudong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Song and Yu, Yi and Yang, Wenhan and Ding, Meiwen and Chen, Zhuo and Duan, Ling-Yu and Kot, Alex C. and Jiang, Xudong}, title = {Theoretical Insights in Model Inversion Robustness and Conditional Entropy Maximization for Collaborative Inference Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8753-8763} }
Odd-One-Out: Anomaly Detection by Comparing with Neighbors: Ankan Bhunia,

Changjian Li,

Hakan Bilen; [pdf] [supp]
[bibtex]
@InProceedings{Bhunia_2025_CVPR, author = {Bhunia, Ankan and Li, Changjian and Bilen, Hakan}, title = {Odd-One-Out: Anomaly Detection by Comparing with Neighbors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20395-20404} }
SCFlow2: Plug-and-Play Object Pose Refiner with Shape-Constraint Scene Flow: Qingyuan Wang,

Rui Song,

Jiaojiao Li,

Kerui Cheng,

David Ferstl,

Yinlin Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qingyuan and Song, Rui and Li, Jiaojiao and Cheng, Kerui and Ferstl, David and Hu, Yinlin}, title = {SCFlow2: Plug-and-Play Object Pose Refiner with Shape-Constraint Scene Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22045-22054} }
D^3CTTA: Domain-Dependent Decorrelation for Continual Test-Time Adaption of 3D LiDAR Segmentation: Jichun Zhao,

Haiyong Jiang,

Haoxuan Song,

Jun Xiao,

Dong Gong; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jichun and Jiang, Haiyong and Song, Haoxuan and Xiao, Jun and Gong, Dong}, title = {D{\textasciicircum}3CTTA: Domain-Dependent Decorrelation for Continual Test-Time Adaption of 3D LiDAR Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11864-11874} }
Flowing from Words to Pixels: A Noise-Free Framework for Cross-Modality Evolution: Qihao Liu,

Xi Yin,

Alan Yuille,

Andrew Brown,

Mannat Singh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Qihao and Yin, Xi and Yuille, Alan and Brown, Andrew and Singh, Mannat}, title = {Flowing from Words to Pixels: A Noise-Free Framework for Cross-Modality Evolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2755-2765} }
FlipSketch: Flipping Static Drawings to Text-Guided Sketch Animations: Hmrishav Bandyopadhyay,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bandyopadhyay_2025_CVPR, author = {Bandyopadhyay, Hmrishav and Song, Yi-Zhe}, title = {FlipSketch: Flipping Static Drawings to Text-Guided Sketch Animations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28394-28404} }
Interpretable Generative Models through Post-hoc Concept Bottlenecks: Akshay Kulkarni,

Ge Yan,

Chung-En Sun,

Tuomas Oikarinen,

Tsui-Wei Weng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulkarni_2025_CVPR, author = {Kulkarni, Akshay and Yan, Ge and Sun, Chung-En and Oikarinen, Tuomas and Weng, Tsui-Wei}, title = {Interpretable Generative Models through Post-hoc Concept Bottlenecks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8162-8171} }
SketchAgent: Language-Driven Sequential Sketch Generation: Yael Vinker,

Tamar Rott Shaham,

Kristine Zheng,

Alex Zhao,

Judith E Fan,

Antonio Torralba; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vinker_2025_CVPR, author = {Vinker, Yael and Shaham, Tamar Rott and Zheng, Kristine and Zhao, Alex and E Fan, Judith and Torralba, Antonio}, title = {SketchAgent: Language-Driven Sequential Sketch Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23355-23368} }
DRAWER: Digital Reconstruction and Articulation With Environment Realism: Hongchi Xia,

Entong Su,

Marius Memmel,

Arhan Jain,

Raymond Yu,

Numfor Mbiziwo-Tiapo,

Ali Farhadi,

Abhishek Gupta,

Shenlong Wang,

Wei-Chiu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Hongchi and Su, Entong and Memmel, Marius and Jain, Arhan and Yu, Raymond and Mbiziwo-Tiapo, Numfor and Farhadi, Ali and Gupta, Abhishek and Wang, Shenlong and Ma, Wei-Chiu}, title = {DRAWER: Digital Reconstruction and Articulation With Environment Realism}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21771-21782} }
GoLF-NRT: Integrating Global Context and Local Geometry for Few-Shot View Synthesis: You Wang,

Li Fang,

Hao Zhu,

Fei Hu,

Long Ye,

Zhan Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, You and Fang, Li and Zhu, Hao and Hu, Fei and Ye, Long and Ma, Zhan}, title = {GoLF-NRT: Integrating Global Context and Local Geometry for Few-Shot View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21349-21359} }
Deep Change Monitoring: A Hyperbolic Representative Learning Framework and a Dataset for Long-term Fine-grained Tree Change Detection: Yante Li,

Hanwen Qi,

Haoyu Chen,

Xinlian Liang,

Guoying Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yante and Qi, Hanwen and Chen, Haoyu and Liang, Xinlian and Zhao, Guoying}, title = {Deep Change Monitoring: A Hyperbolic Representative Learning Framework and a Dataset for Long-term Fine-grained Tree Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27346-27356} }
A Closer Look at Time Steps is Worthy of Triple Speed-Up for Diffusion Model Training: Kai Wang,

Mingjia Shi,

Yukun Zhou,

Zekai Li,

Zhihang Yuan,

Yuzhang Shang,

Xiaojiang Peng,

Hanwang Zhang,

Yang You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Kai and Shi, Mingjia and Zhou, Yukun and Li, Zekai and Yuan, Zhihang and Shang, Yuzhang and Peng, Xiaojiang and Zhang, Hanwang and You, Yang}, title = {A Closer Look at Time Steps is Worthy of Triple Speed-Up for Diffusion Model Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12934-12944} }
Empowering LLMs to Understand and Generate Complex Vector Graphics: Ximing Xing,

Juncheng Hu,

Guotao Liang,

Jing Zhang,

Dong Xu,

Qian Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Ximing and Hu, Juncheng and Liang, Guotao and Zhang, Jing and Xu, Dong and Yu, Qian}, title = {Empowering LLMs to Understand and Generate Complex Vector Graphics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19487-19497} }
PanoGS: Gaussian-based Panoptic Segmentation for 3D Open Vocabulary Scene Understanding: Hongjia Zhai,

Hai Li,

Zhenzhe Li,

Xiaokun Pan,

Yijia He,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhai_2025_CVPR, author = {Zhai, Hongjia and Li, Hai and Li, Zhenzhe and Pan, Xiaokun and He, Yijia and Zhang, Guofeng}, title = {PanoGS: Gaussian-based Panoptic Segmentation for 3D Open Vocabulary Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14114-14124} }
Watermarking One for All: A Robust Watermarking Scheme Against Partial Image Theft: Gaozhi Liu,

Silu Cao,

Zhenxing Qian,

Xinpeng Zhang,

Sheng Li,

Wanli Peng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Gaozhi and Cao, Silu and Qian, Zhenxing and Zhang, Xinpeng and Li, Sheng and Peng, Wanli}, title = {Watermarking One for All: A Robust Watermarking Scheme Against Partial Image Theft}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8225-8234} }
ITA-MDT: Image-Timestep-Adaptive Masked Diffusion Transformer Framework for Image-Based Virtual Try-On: Ji Woo Hong,

Tri Ton,

Trung X. Pham,

Gwanhyeong Koo,

Sunjae Yoon,

Chang D. Yoo; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Ji Woo and Ton, Tri and Pham, Trung X. and Koo, Gwanhyeong and Yoon, Sunjae and Yoo, Chang D.}, title = {ITA-MDT: Image-Timestep-Adaptive Masked Diffusion Transformer Framework for Image-Based Virtual Try-On}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28284-28294} }
MultiVENT 2.0: A Massive Multilingual Benchmark for Event-Centric Video Retrieval: Reno Kriz,

Kate Sanders,

David Etter,

Kenton Murray,

Cameron Carpenter,

Hannah Recknor,

Jimena Guallar-Blasco,

Alexander Martin,

Eugene Yang,

Benjamin Van Durme; [pdf] [supp]
[bibtex]
@InProceedings{Kriz_2025_CVPR, author = {Kriz, Reno and Sanders, Kate and Etter, David and Murray, Kenton and Carpenter, Cameron and Recknor, Hannah and Guallar-Blasco, Jimena and Martin, Alexander and Yang, Eugene and Van Durme, Benjamin}, title = {MultiVENT 2.0: A Massive Multilingual Benchmark for Event-Centric Video Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24149-24158} }
VolFormer: Explore More Comprehensive Cube Interaction for Hyperspectral Image Restoration and Beyond: Dabing Yu,

Zheng Gao; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Dabing and Gao, Zheng}, title = {VolFormer: Explore More Comprehensive Cube Interaction for Hyperspectral Image Restoration and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28091-28101} }
Minding Fuzzy Regions: A Data-driven Alternating Learning Paradigm for Stable Lesion Segmentation: Lexin Fang,

Yunyang Xu,

Xiang Ma,

Xuemei Li,

Caiming Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Lexin and Xu, Yunyang and Ma, Xiang and Li, Xuemei and Zhang, Caiming}, title = {Minding Fuzzy Regions: A Data-driven Alternating Learning Paradigm for Stable Lesion Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10425-10434} }
BizGen: Advancing Article-level Visual Text Rendering for Infographics Generation: Yuyang Peng,

Shishi Xiao,

Keming Wu,

Qisheng Liao,

Bohan Chen,

Kevin Lin,

Danqing Huang,

Ji Li,

Yuhui Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Yuyang and Xiao, Shishi and Wu, Keming and Liao, Qisheng and Chen, Bohan and Lin, Kevin and Huang, Danqing and Li, Ji and Yuan, Yuhui}, title = {BizGen: Advancing Article-level Visual Text Rendering for Infographics Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23615-23624} }
MLVU: Benchmarking Multi-task Long Video Understanding: Junjie Zhou,

Yan Shu,

Bo Zhao,

Boya Wu,

Zhengyang Liang,

Shitao Xiao,

Minghao Qin,

Xi Yang,

Yongping Xiong,

Bo Zhang,

Tiejun Huang,

Zheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Shu, Yan and Zhao, Bo and Wu, Boya and Liang, Zhengyang and Xiao, Shitao and Qin, Minghao and Yang, Xi and Xiong, Yongping and Zhang, Bo and Huang, Tiejun and Liu, Zheng}, title = {MLVU: Benchmarking Multi-task Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13691-13701} }
Recovering Dynamic 3D Sketches from Videos: Jaeah Lee,

Changwoon Choi,

Young Min Kim,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jaeah and Choi, Changwoon and Kim, Young Min and Park, Jaesik}, title = {Recovering Dynamic 3D Sketches from Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12423-12432} }
IM-Zero: Instance-level Motion Controllable Video Generation in a Zero-shot Manner: Yuyang Huang,

Yabo Chen,

Li Ding,

Xiaopeng Zhang,

Wenrui Dai,

Junni Zou,

Hongkai Xiong,

Qi Tian; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yuyang and Chen, Yabo and Ding, Li and Zhang, Xiaopeng and Dai, Wenrui and Zou, Junni and Xiong, Hongkai and Tian, Qi}, title = {IM-Zero: Instance-level Motion Controllable Video Generation in a Zero-shot Manner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7265-7275} }
EigenGS Representation: From Eigenspace to Gaussian Image Space: Lo-Wei Tai,

Ching-En Li,

Cheng-Lin Chen,

Chih-Jung Tsai,

Hwann-Tzong Chen,

Tyng-Luh Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tai_2025_CVPR, author = {Tai, Lo-Wei and Li, Ching-En and Chen, Cheng-Lin and Tsai, Chih-Jung and Chen, Hwann-Tzong and Liu, Tyng-Luh}, title = {EigenGS Representation: From Eigenspace to Gaussian Image Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13487-13496} }
Link-based Contrastive Learning for One-Shot Unsupervised Domain Adaptation: Yue Zhang,

Mingyue Bin,

Yuyang Zhang,

Zhongyuan Wang,

Zhen Han,

Chao Liang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yue and Bin, Mingyue and Zhang, Yuyang and Wang, Zhongyuan and Han, Zhen and Liang, Chao}, title = {Link-based Contrastive Learning for One-Shot Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4916-4926} }
SmartCLIP: Modular Vision-language Alignment with Identification Guarantees: Shaoan Xie,

Lingjing Lingjing,

Yujia Zheng,

Yu Yao,

Zeyu Tang,

Eric P. Xing,

Guangyi Chen,

Kun Zhang; [pdf]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Shaoan and Lingjing, Lingjing and Zheng, Yujia and Yao, Yu and Tang, Zeyu and Xing, Eric P. and Chen, Guangyi and Zhang, Kun}, title = {SmartCLIP: Modular Vision-language Alignment with Identification Guarantees}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29780-29790} }
UniMamba: Unified Spatial-Channel Representation Learning with Group-Efficient Mamba for LiDAR-based 3D Object Detection: Xin Jin,

Haisheng Su,

Kai Liu,

Cong Ma,

Wei Wu,

Fei HUI,

Junchi Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Xin and Su, Haisheng and Liu, Kai and Ma, Cong and Wu, Wei and HUI, Fei and Yan, Junchi}, title = {UniMamba: Unified Spatial-Channel Representation Learning with Group-Efficient Mamba for LiDAR-based 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1407-1417} }
MaSS13K: A Matting-level Semantic Segmentation Benchmark: Chenxi Xie,

Minghan Li,

Hui Zeng,

Jun Luo,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Chenxi and Li, Minghan and Zeng, Hui and Luo, Jun and Zhang, Lei}, title = {MaSS13K: A Matting-level Semantic Segmentation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14046-14056} }
Rethinking the Adversarial Robustness of Multi-Exit Neural Networks in an Attack-Defense Game: Keyizhi Xu,

Chi Zhang,

Zhan Chen,

Zhongyuan Wang,

Chunxia Xiao,

Chao Liang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Keyizhi and Zhang, Chi and Chen, Zhan and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {Rethinking the Adversarial Robustness of Multi-Exit Neural Networks in an Attack-Defense Game}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10265-10274} }
Enhancing Testing-Time Robustness for Trusted Multi-View Classification in the Wild: Wei Liu,

Yufei Chen,

Xiaodong Yue; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Wei and Chen, Yufei and Yue, Xiaodong}, title = {Enhancing Testing-Time Robustness for Trusted Multi-View Classification in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15508-15517} }
Q-DiT: Accurate Post-Training Quantization for Diffusion Transformers: Lei Chen,

Yuan Meng,

Chen Tang,

Xinzhu Ma,

Jingyan Jiang,

Xin Wang,

Zhi Wang,

Wenwu Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Lei and Meng, Yuan and Tang, Chen and Ma, Xinzhu and Jiang, Jingyan and Wang, Xin and Wang, Zhi and Zhu, Wenwu}, title = {Q-DiT: Accurate Post-Training Quantization for Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28306-28315} }
ROD-MLLM: Towards More Reliable Object Detection in Multimodal Large Language Models: Heng Yin,

Yuqiang Ren,

Ke Yan,

Shouhong Ding,

Yongtao Hao; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Heng and Ren, Yuqiang and Yan, Ke and Ding, Shouhong and Hao, Yongtao}, title = {ROD-MLLM: Towards More Reliable Object Detection in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14358-14368} }
RoboGround: Robotic Manipulation with Grounded Vision-Language Priors: Haifeng Huang,

Xinyi Chen,

Yilun Chen,

Hao Li,

Xiaoshen Han,

Zehan Wang,

Tai Wang,

Jiangmiao Pang,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Haifeng and Chen, Xinyi and Chen, Yilun and Li, Hao and Han, Xiaoshen and Wang, Zehan and Wang, Tai and Pang, Jiangmiao and Zhao, Zhou}, title = {RoboGround: Robotic Manipulation with Grounded Vision-Language Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22540-22550} }
VideoGuide: Improving Video Diffusion Models without Training Through a Teacher's Guide: Dohun Lee,

Bryan Sangwoo Kim,

Geon Yeong Park,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Dohun and Kim, Bryan Sangwoo and Park, Geon Yeong and Ye, Jong Chul}, title = {VideoGuide: Improving Video Diffusion Models without Training Through a Teacher's Guide}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2599-2608} }
Improving Transferable Targeted Attacks with Feature Tuning Mixup: Kaisheng Liang,

Xuelong Dai,

Yanjie Li,

Dong Wang,

Bin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Kaisheng and Dai, Xuelong and Li, Yanjie and Wang, Dong and Xiao, Bin}, title = {Improving Transferable Targeted Attacks with Feature Tuning Mixup}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25802-25811} }
OmniStereo: Real-time Omnidireactional Depth Estimation with Multiview Fisheye Cameras: Jiaxi Deng,

Yushen Wang,

Haitao Meng,

Zuoxun Hou,

Yi Chang,

Gang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Jiaxi and Wang, Yushen and Meng, Haitao and Hou, Zuoxun and Chang, Yi and Chen, Gang}, title = {OmniStereo: Real-time Omnidireactional Depth Estimation with Multiview Fisheye Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1003-1012} }
DroneSplat: 3D Gaussian Splatting for Robust 3D Reconstruction from In-the-Wild Drone Imagery: Jiadong Tang,

Yu Gao,

Dianyi Yang,

Liqi Yan,

Yufeng Yue,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Jiadong and Gao, Yu and Yang, Dianyi and Yan, Liqi and Yue, Yufeng and Yang, Yi}, title = {DroneSplat: 3D Gaussian Splatting for Robust 3D Reconstruction from In-the-Wild Drone Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {833-843} }
SDGOCC: Semantic and Depth-Guided Bird's-Eye View Transformation for 3D Multimodal Occupancy Prediction: ZaiPeng Duan,

ChenXu Dang,

Xuzhong Hu,

Pei An,

Junfeng Ding,

Jie Zhan,

YunBiao Xu,

Jie Ma; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, ZaiPeng and Dang, ChenXu and Hu, Xuzhong and An, Pei and Ding, Junfeng and Zhan, Jie and Xu, YunBiao and Ma, Jie}, title = {SDGOCC: Semantic and Depth-Guided Bird's-Eye View Transformation for 3D Multimodal Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6751-6760} }
DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation: Tianyi Yan,

Dongming Wu,

Wencheng Han,

Junpeng Jiang,

Xia Zhou,

Kun Zhan,

Cheng-zhong Xu,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Tianyi and Wu, Dongming and Han, Wencheng and Jiang, Junpeng and Zhou, Xia and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27531-27541} }
nnWNet: Rethinking the Use of Transformers in Biomedical Image Segmentation and Calling for a Unified Evaluation Benchmark: Yanfeng Zhou,

Lingrui Li,

Le Lu,

Minfeng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yanfeng and Li, Lingrui and Lu, Le and Xu, Minfeng}, title = {nnWNet: Rethinking the Use of Transformers in Biomedical Image Segmentation and Calling for a Unified Evaluation Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20852-20862} }
Efficient Video Face Enhancement with Enhanced Spatial-Temporal Consistency: Yutong Wang,

Jiajie Teng,

Jiajiong Cao,

Yuming Li,

Chenguang Ma,

Hongteng Xu,

Dixin Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yutong and Teng, Jiajie and Cao, Jiajiong and Li, Yuming and Ma, Chenguang and Xu, Hongteng and Luo, Dixin}, title = {Efficient Video Face Enhancement with Enhanced Spatial-Temporal Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2183-2193} }
VELOCITI: Benchmarking Video-Language Compositional Reasoning with Strict Entailment: Darshana Saravanan,

Varun Gupta,

Darshan Singh,

Zeeshan Khan,

Vineet Gandhi,

Makarand Tapaswi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saravanan_2025_CVPR, author = {Saravanan, Darshana and Gupta, Varun and Singh, Darshan and Khan, Zeeshan and Gandhi, Vineet and Tapaswi, Makarand}, title = {VELOCITI: Benchmarking Video-Language Compositional Reasoning with Strict Entailment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18914-18924} }
Seeing is Not Believing: Adversarial Natural Object Optimization for Hard-Label 3D Scene Attacks: Daizong Liu,

Wei Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Daizong and Hu, Wei}, title = {Seeing is Not Believing: Adversarial Natural Object Optimization for Hard-Label 3D Scene Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11886-11897} }
IDProtector: An Adversarial Noise Encoder to Protect Against ID-Preserving Image Generation: Yiren Song,

Pei Yang,

Hai Ci,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Yiren and Yang, Pei and Ci, Hai and Shou, Mike Zheng}, title = {IDProtector: An Adversarial Noise Encoder to Protect Against ID-Preserving Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3019-3028} }
HuPerFlow: A Comprehensive Benchmark for Human vs. Machine Motion Estimation Comparison: Yung-Hao Yang,

Zitang Sun,

Taiki Fukiage,

Shin'ya Nishida; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yung-Hao and Sun, Zitang and Fukiage, Taiki and Nishida, Shin'ya}, title = {HuPerFlow: A Comprehensive Benchmark for Human vs. Machine Motion Estimation Comparison}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22799-22808} }
LoTUS: Large-Scale Machine Unlearning with a Taste of Uncertainty: Christoforos N. Spartalis,

Theodoros Semertzidis,

Efstratios Gavves,

Petros Daras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Spartalis_2025_CVPR, author = {Spartalis, Christoforos N. and Semertzidis, Theodoros and Gavves, Efstratios and Daras, Petros}, title = {LoTUS: Large-Scale Machine Unlearning with a Taste of Uncertainty}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10046-10055} }
SleeperMark: Towards Robust Watermark against Fine-Tuning Text-to-image Diffusion Models: Zilan Wang,

Junfeng Guo,

Jiacheng Zhu,

Yiming Li,

Heng Huang,

Muhao Chen,

Zhengzhong Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zilan and Guo, Junfeng and Zhu, Jiacheng and Li, Yiming and Huang, Heng and Chen, Muhao and Tu, Zhengzhong}, title = {SleeperMark: Towards Robust Watermark against Fine-Tuning Text-to-image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8213-8224} }
Lessons and Insights from a Unifying Study of Parameter-Efficient Fine-Tuning (PEFT) in Visual Recognition: Zheda Mai,

Ping Zhang,

Cheng-Hao Tu,

Hong-You Chen,

Quang-Huy Nguyen,

Li Zhang,

Wei-Lun Chao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mai_2025_CVPR, author = {Mai, Zheda and Zhang, Ping and Tu, Cheng-Hao and Chen, Hong-You and Nguyen, Quang-Huy and Zhang, Li and Chao, Wei-Lun}, title = {Lessons and Insights from a Unifying Study of Parameter-Efficient Fine-Tuning (PEFT) in Visual Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14845-14857} }
Pippo: High-Resolution Multi-View Humans from a Single Image: Yash Kant,

Ethan Weber,

Jin Kyu Kim,

Rawal Khirodkar,

Su Zhaoen,

Julieta Martinez,

Igor Gilitschenski,

Shunsuke Saito,

Timur Bagautdinov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kant_2025_CVPR, author = {Kant, Yash and Weber, Ethan and Kim, Jin Kyu and Khirodkar, Rawal and Zhaoen, Su and Martinez, Julieta and Gilitschenski, Igor and Saito, Shunsuke and Bagautdinov, Timur}, title = {Pippo: High-Resolution Multi-View Humans from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16418-16429} }
H2ST: Hierarchical Two-Sample Tests for Continual Out-of-Distribution Detection: Yuhang Liu,

Wenjie Zhao,

Yunhui Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhang and Zhao, Wenjie and Guo, Yunhui}, title = {H2ST: Hierarchical Two-Sample Tests for Continual Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15413-15423} }
MetaWriter: Personalized Handwritten Text Recognition Using Meta-Learned Prompt Tuning: Wenhao Gu,

Li Gu,

Chingyee Yee Suen,

Yang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Wenhao and Gu, Li and Suen, Chingyee Yee and Wang, Yang}, title = {MetaWriter: Personalized Handwritten Text Recognition Using Meta-Learned Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23494-23504} }
Subnet-Aware Dynamic Supernet Training for Neural Architecture Search: Jeimin Jeon,

Youngmin Oh,

Junghyup Lee,

Donghyeon Baek,

Dohyung Kim,

Chanho Eom,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2025_CVPR, author = {Jeon, Jeimin and Oh, Youngmin and Lee, Junghyup and Baek, Donghyeon and Kim, Dohyung and Eom, Chanho and Ham, Bumsub}, title = {Subnet-Aware Dynamic Supernet Training for Neural Architecture Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30137-30146} }
MoVE-KD: Knowledge Distillation for VLMs with Mixture of Visual Encoders: Jiajun Cao,

Yuan Zhang,

Tao Huang,

Ming Lu,

Qizhe Zhang,

Ruichuan An,

Ningning Ma,

Shanghang Zhang; [pdf]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Jiajun and Zhang, Yuan and Huang, Tao and Lu, Ming and Zhang, Qizhe and An, Ruichuan and Ma, Ningning and Zhang, Shanghang}, title = {MoVE-KD: Knowledge Distillation for VLMs with Mixture of Visual Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19846-19856} }
CamFreeDiff: Camera-free Image to Panorama Generation with Diffusion Model: Xiaoding Yuan,

Shitao Tang,

Kejie Li,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Xiaoding and Tang, Shitao and Li, Kejie and Wang, Peng}, title = {CamFreeDiff: Camera-free Image to Panorama Generation with Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16408-16417} }
Improving Visual and Downstream Performance of Low-Light Enhancer with Vision Foundation Models Collaboration: Yuxuan Gu,

Haoxuan Wang,

Pengyang Ling,

Zhixiang Wei,

Huaian Chen,

Yi Jin,

Enhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yuxuan and Wang, Haoxuan and Ling, Pengyang and Wei, Zhixiang and Chen, Huaian and Jin, Yi and Chen, Enhong}, title = {Improving Visual and Downstream Performance of Low-Light Enhancer with Vision Foundation Models Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16071-16080} }
EchoWorld: Learning Motion-Aware World Models for Echocardiography Probe Guidance: Yang Yue,

Yulin Wang,

Haojun Jiang,

Pan Liu,

Shiji Song,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2025_CVPR, author = {Yue, Yang and Wang, Yulin and Jiang, Haojun and Liu, Pan and Song, Shiji and Huang, Gao}, title = {EchoWorld: Learning Motion-Aware World Models for Echocardiography Probe Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25993-26003} }
Controllable Human Image Generation with Personalized Multi-Garments: Yisol Choi,

Sangkyung Kwak,

Sihyun Yu,

Hyungwon Choi,

Jinwoo Shin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Yisol and Kwak, Sangkyung and Yu, Sihyun and Choi, Hyungwon and Shin, Jinwoo}, title = {Controllable Human Image Generation with Personalized Multi-Garments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28736-28747} }
FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs: Mothilal Asokan,

Kebin Wu,

Fatima Albreiki; [pdf] [supp]
[bibtex]
@InProceedings{Asokan_2025_CVPR, author = {Asokan, Mothilal and Wu, Kebin and Albreiki, Fatima}, title = {FineLIP: Extending CLIP's Reach via Fine-Grained Alignment with Longer Text Inputs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14495-14504} }
Illumination Spectrum Estimation for Multispectral Images via Surface Reflectance Modeling and Spatial-Spectral Feature Generation: Hyejin Oh,

Woo-Shik Kim,

Sangyoon Lee,

YungKyung Park,

Je-Won Kang; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2025_CVPR, author = {Oh, Hyejin and Kim, Woo-Shik and Lee, Sangyoon and Park, YungKyung and Kang, Je-Won}, title = {Illumination Spectrum Estimation for Multispectral Images via Surface Reflectance Modeling and Spatial-Spectral Feature Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2215-2225} }
UHD-processer: Unified UHD Image Restoration with Progressive Frequency Learning and Degradation-aware Prompts: Yidi Liu,

Dong Li,

Xueyang Fu,

Xin Lu,

Jie Huang,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yidi and Li, Dong and Fu, Xueyang and Lu, Xin and Huang, Jie and Zha, Zheng-Jun}, title = {UHD-processer: Unified UHD Image Restoration with Progressive Frequency Learning and Degradation-aware Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23121-23130} }
Divot: Diffusion Powers Video Tokenizer for Comprehension and Generation: Yuying Ge,

Yizhuo Li,

Yixiao Ge,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_CVPR, author = {Ge, Yuying and Li, Yizhuo and Ge, Yixiao and Shan, Ying}, title = {Divot: Diffusion Powers Video Tokenizer for Comprehension and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13606-13617} }
Towards Zero-Shot Anomaly Detection and Reasoning with Multimodal Large Language Models: Jiacong Xu,

Shao-Yuan Lo,

Bardia Safaei,

Vishal M. Patel,

Isht Dwivedi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jiacong and Lo, Shao-Yuan and Safaei, Bardia and Patel, Vishal M. and Dwivedi, Isht}, title = {Towards Zero-Shot Anomaly Detection and Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20370-20382} }
Towards Explainable and Unprecedented Accuracy in Matching Challenging Finger Crease Patterns: Zhenyu Zhou,

Chengdong Dong,

Ajay Kumar; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zhenyu and Dong, Chengdong and Kumar, Ajay}, title = {Towards Explainable and Unprecedented Accuracy in Matching Challenging Finger Crease Patterns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6212-6221} }
Neural Hierarchical Decomposition for Single Image Plant Modeling: Zhihao Liu,

Zhanglin Cheng,

Naoto Yokoya; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhihao and Cheng, Zhanglin and Yokoya, Naoto}, title = {Neural Hierarchical Decomposition for Single Image Plant Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {733-742} }
GBC-Splat: Generalizable Gaussian-Based Clothed Human Digitalization under Sparse RGB Cameras: Hanzhang Tu,

Zhanfeng Liao,

Boyao Zhou,

Shunyuan Zheng,

Xilong Zhou,

Liuxin Zhang,

QianYing Wang,

Yebin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Hanzhang and Liao, Zhanfeng and Zhou, Boyao and Zheng, Shunyuan and Zhou, Xilong and Zhang, Liuxin and Wang, QianYing and Liu, Yebin}, title = {GBC-Splat: Generalizable Gaussian-Based Clothed Human Digitalization under Sparse RGB Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26377-26387} }
AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers: Sherwin Bahmani,

Ivan Skorokhodov,

Guocheng Qian,

Aliaksandr Siarohin,

Willi Menapace,

Andrea Tagliasacchi,

David B. Lindell,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bahmani_2025_CVPR, author = {Bahmani, Sherwin and Skorokhodov, Ivan and Qian, Guocheng and Siarohin, Aliaksandr and Menapace, Willi and Tagliasacchi, Andrea and Lindell, David B. and Tulyakov, Sergey}, title = {AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22875-22889} }
A Unified Model for Compressed Sensing MRI Across Undersampling Patterns: Armeet Singh Jatyani,

Jiayun Wang,

Aditi Chandrashekar,

Zihui Wu,

Miguel Liu-Schiaffini,

Bahareh Tolooshams,

Anima Anandkumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jatyani_2025_CVPR, author = {Jatyani, Armeet Singh and Wang, Jiayun and Chandrashekar, Aditi and Wu, Zihui and Liu-Schiaffini, Miguel and Tolooshams, Bahareh and Anandkumar, Anima}, title = {A Unified Model for Compressed Sensing MRI Across Undersampling Patterns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26004-26013} }
Video-Guided Foley Sound Generation with Multimodal Controls: Ziyang Chen,

Prem Seetharaman,

Bryan Russell,

Oriol Nieto,

David Bourgin,

Andrew Owens,

Justin Salamon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ziyang and Seetharaman, Prem and Russell, Bryan and Nieto, Oriol and Bourgin, David and Owens, Andrew and Salamon, Justin}, title = {Video-Guided Foley Sound Generation with Multimodal Controls}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18770-18781} }
Dual-Agent Optimization framework for Cross-Domain Few-Shot Segmentation: Zhaoyang Li,

Yuan Wang,

Wangkai Li,

Tianzhu Zhang,

Xiang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhaoyang and Wang, Yuan and Li, Wangkai and Zhang, Tianzhu and Liu, Xiang}, title = {Dual-Agent Optimization framework for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9849-9859} }
SACB-Net: Spatial-awareness Convolutions for Medical Image Registration: Xinxing Cheng,

Tianyang Zhang,

Wenqi Lu,

Qingjie Meng,

Alejandro F. Frangi,

Jinming Duan; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Xinxing and Zhang, Tianyang and Lu, Wenqi and Meng, Qingjie and Frangi, Alejandro F. and Duan, Jinming}, title = {SACB-Net: Spatial-awareness Convolutions for Medical Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5227-5237} }
Text Embedding is Not All You Need: Attention Control for Text-to-Image Semantic Alignment with Text Self-Attention Maps: Jeeyung Kim,

Erfan Esmaeili,

Qiang Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jeeyung and Esmaeili, Erfan and Qiu, Qiang}, title = {Text Embedding is Not All You Need: Attention Control for Text-to-Image Semantic Alignment with Text Self-Attention Maps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8031-8040} }
DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion: Jinyuan Liu,

Bowei Zhang,

Qingyun Mei,

Xingyuan Li,

Yang Zou,

Zhiying Jiang,

Long Ma,

Risheng Liu,

Xin Fan; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jinyuan and Zhang, Bowei and Mei, Qingyun and Li, Xingyuan and Zou, Yang and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {DCEvo: Discriminative Cross-Dimensional Evolutionary Learning for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2226-2235} }
TSD-SR: One-Step Diffusion with Target Score Distillation for Real-World Image Super-Resolution: Linwei Dong,

Qingnan Fan,

Yihong Guo,

Zhonghao Wang,

Qi Zhang,

Jinwei Chen,

Yawei Luo,

Changqing Zou; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Linwei and Fan, Qingnan and Guo, Yihong and Wang, Zhonghao and Zhang, Qi and Chen, Jinwei and Luo, Yawei and Zou, Changqing}, title = {TSD-SR: One-Step Diffusion with Target Score Distillation for Real-World Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23174-23184} }
AIpparel: A Multimodal Foundation Model for Digital Garments: Kiyohiro Nakayama,

Jan Ackermann,

Timur Levent Kesdogan,

Yang Zheng,

Maria Korosteleva,

Olga Sorkine-Hornung,

Leonidas J. Guibas,

Guandao Yang,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nakayama_2025_CVPR, author = {Nakayama, Kiyohiro and Ackermann, Jan and Kesdogan, Timur Levent and Zheng, Yang and Korosteleva, Maria and Sorkine-Hornung, Olga and Guibas, Leonidas J. and Yang, Guandao and Wetzstein, Gordon}, title = {AIpparel: A Multimodal Foundation Model for Digital Garments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8138-8149} }
Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass: Jianing Yang,

Alexander Sax,

Kevin J. Liang,

Mikael Henaff,

Hao Tang,

Ang Cao,

Joyce Chai,

Franziska Meier,

Matt Feiszli; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jianing and Sax, Alexander and Liang, Kevin J. and Henaff, Mikael and Tang, Hao and Cao, Ang and Chai, Joyce and Meier, Franziska and Feiszli, Matt}, title = {Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21924-21935} }
StyleStudio: Text-Driven Style Transfer with Selective Control of Style Elements: Mingkun Lei,

Xue Song,

Beier Zhu,

Hao Wang,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Mingkun and Song, Xue and Zhu, Beier and Wang, Hao and Zhang, Chi}, title = {StyleStudio: Text-Driven Style Transfer with Selective Control of Style Elements}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23443-23452} }
CTRL-O: Language-Controllable Object-Centric Visual Representation Learning: Aniket Didolkar,

Andrii Zadaianchuk,

Rabiul Awal,

Maximilian Seitzer,

Efstratios Gavves,

Aishwarya Agrawal; [pdf] [supp]
[bibtex]
@InProceedings{Didolkar_2025_CVPR, author = {Didolkar, Aniket and Zadaianchuk, Andrii and Awal, Rabiul and Seitzer, Maximilian and Gavves, Efstratios and Agrawal, Aishwarya}, title = {CTRL-O: Language-Controllable Object-Centric Visual Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29523-29533} }
PO3AD: Predicting Point Offsets toward Better 3D Point Cloud Anomaly Detection: Jianan Ye,

Weiguang Zhao,

Xi Yang,

Guangliang Cheng,

Kaizhu Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Jianan and Zhao, Weiguang and Yang, Xi and Cheng, Guangliang and Huang, Kaizhu}, title = {PO3AD: Predicting Point Offsets toward Better 3D Point Cloud Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1353-1362} }
Text Augmented Correlation Transformer For Few-shot Classification & Segmentation: Srinivasa Rao Nandam,

Sara Atito,

Zhenhua Feng,

Josef Kittler,

Muhammad Awais; [pdf] [supp]
[bibtex]
@InProceedings{Nandam_2025_CVPR, author = {Nandam, Srinivasa Rao and Atito, Sara and Feng, Zhenhua and Kittler, Josef and Awais, Muhammad}, title = {Text Augmented Correlation Transformer For Few-shot Classification \& Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25357-25366} }
F^3OCUS - Federated Finetuning of Vision-Language Foundation Models with Optimal Client Layer Updating Strategy via Multi-objective Meta-Heuristics: Pramit Saha,

Felix Wagner,

Divyanshu Mishra,

Can Peng,

Anshul Thakur,

David A. Clifton,

Konstantinos Kamnitsas,

J. Alison Noble; [pdf] [supp]
[bibtex]
@InProceedings{Saha_2025_CVPR, author = {Saha, Pramit and Wagner, Felix and Mishra, Divyanshu and Peng, Can and Thakur, Anshul and Clifton, David A. and Kamnitsas, Konstantinos and Noble, J. Alison}, title = {F{\textasciicircum}3OCUS - Federated Finetuning of Vision-Language Foundation Models with Optimal Client Layer Updating Strategy via Multi-objective Meta-Heuristics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20006-20017} }
ICT: Image-Object Cross-Level Trusted Intervention for Mitigating Object Hallucination in Large Vision-Language Models: Junzhe Chen,

Tianshu Zhang,

Shiyu Huang,

Yuwei Niu,

Linfeng Zhang,

Lijie Wen,

Xuming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Junzhe and Zhang, Tianshu and Huang, Shiyu and Niu, Yuwei and Zhang, Linfeng and Wen, Lijie and Hu, Xuming}, title = {ICT: Image-Object Cross-Level Trusted Intervention for Mitigating Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4209-4221} }
PreciseCam: Precise Camera Control for Text-to-Image Generation: Edurne Bernal-Berdun,

Ana Serrano,

Belen Masia,

Matheus Gadelha,

Yannick Hold-Geoffroy,

Xin Sun,

Diego Gutierrez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bernal-Berdun_2025_CVPR, author = {Bernal-Berdun, Edurne and Serrano, Ana and Masia, Belen and Gadelha, Matheus and Hold-Geoffroy, Yannick and Sun, Xin and Gutierrez, Diego}, title = {PreciseCam: Precise Camera Control for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2724-2733} }
3D Occupancy Prediction with Low-Resolution Queries via Prototype-aware View Transformation: Gyeongrok Oh,

Sungjune Kim,

Heeju Ko,

Hyung-gun Chi,

Jinkyu Kim,

Dongwook Lee,

Daehyun Ji,

Sungjoon Choi,

Sujin Jang,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oh_2025_CVPR, author = {Oh, Gyeongrok and Kim, Sungjune and Ko, Heeju and Chi, Hyung-gun and Kim, Jinkyu and Lee, Dongwook and Ji, Daehyun and Choi, Sungjoon and Jang, Sujin and Kim, Sangpil}, title = {3D Occupancy Prediction with Low-Resolution Queries via Prototype-aware View Transformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17134-17144} }
Unified Dense Prediction of Video Diffusion: Lehan Yang,

Lu Qi,

Xiangtai Li,

Sheng Li,

Varun Jampani,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Lehan and Qi, Lu and Li, Xiangtai and Li, Sheng and Jampani, Varun and Yang, Ming-Hsuan}, title = {Unified Dense Prediction of Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28963-28973} }
Can Large Vision-Language Models Correct Semantic Grounding Errors By Themselves?: Yuan-Hong Liao,

Rafid Mahmood,

Sanja Fidler,

David Acuna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Yuan-Hong and Mahmood, Rafid and Fidler, Sanja and Acuna, David}, title = {Can Large Vision-Language Models Correct Semantic Grounding Errors By Themselves?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14667-14678} }
SET: Spectral Enhancement for Tiny Object Detection: Huixin Sun,

Runqi Wang,

Yanjing Li,

Linlin Yang,

Shaohui Lin,

Xianbin Cao,

Baochang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Huixin and Wang, Runqi and Li, Yanjing and Yang, Linlin and Lin, Shaohui and Cao, Xianbin and Zhang, Baochang}, title = {SET: Spectral Enhancement for Tiny Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4713-4723} }
g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks: Zihan Wang,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zihan and Lee, Gim Hee}, title = {g3D-LF: Generalizable 3D-Language Feature Fields for Embodied Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14191-14202} }
Towards Million-Scale Adversarial Robustness Evaluation With Stronger Individual Attacks: Yong Xie,

Weijie Zheng,

Hanxun Huang,

Guangnan Ye,

Xingjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yong and Zheng, Weijie and Huang, Hanxun and Ye, Guangnan and Ma, Xingjun}, title = {Towards Million-Scale Adversarial Robustness Evaluation With Stronger Individual Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30702-30711} }
Temporal Action Detection Model Compression by Progressive Block Drop: Xiaoyong Chen,

Yong Guo,

Jiaming Liang,

Sitong Zhuang,

Runhao Zeng,

Xiping Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xiaoyong and Guo, Yong and Liang, Jiaming and Zhuang, Sitong and Zeng, Runhao and Hu, Xiping}, title = {Temporal Action Detection Model Compression by Progressive Block Drop}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29225-29236} }
Differentiable Inverse Rendering with Interpretable Basis BRDFs: Hoon-Gyu Chung,

Seokjun Choi,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2025_CVPR, author = {Chung, Hoon-Gyu and Choi, Seokjun and Baek, Seung-Hwan}, title = {Differentiable Inverse Rendering with Interpretable Basis BRDFs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {475-484} }
EquiPose: Exploiting Permutation Equivariance for Relative Camera Pose Estimation: Yuzhen Liu,

Qiulei Dong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzhen and Dong, Qiulei}, title = {EquiPose: Exploiting Permutation Equivariance for Relative Camera Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1127-1137} }
Face Forgery Video Detection via Temporal Forgery Cue Unraveling: Zonghui Guo,

Yingjie Liu,

Jie Zhang,

Haiyong Zheng,

Shiguang Shan; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zonghui and Liu, Yingjie and Zhang, Jie and Zheng, Haiyong and Shan, Shiguang}, title = {Face Forgery Video Detection via Temporal Forgery Cue Unraveling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7396-7405} }
Temporally Consistent Object-Centric Learning by Contrasting Slots: Anna Manasyan,

Maximilian Seitzer,

Filip Radovic,

Georg Martius,

Andrii Zadaianchuk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Manasyan_2025_CVPR, author = {Manasyan, Anna and Seitzer, Maximilian and Radovic, Filip and Martius, Georg and Zadaianchuk, Andrii}, title = {Temporally Consistent Object-Centric Learning by Contrasting Slots}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5401-5411} }
MC^2: Multi-concept Guidance for Customized Multi-concept Generation: Jiaxiu Jiang,

Yabo Zhang,

Kailai Feng,

Xiaohe Wu,

Wenbo Li,

Renjing Pei,

Fan Li,

Wangmeng Zuo; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jiaxiu and Zhang, Yabo and Feng, Kailai and Wu, Xiaohe and Li, Wenbo and Pei, Renjing and Li, Fan and Zuo, Wangmeng}, title = {MC{\textasciicircum}2: Multi-concept Guidance for Customized Multi-concept Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2802-2812} }
UniReal: Universal Image Generation and Editing via Learning Real-world Dynamics: Xi Chen,

Zhifei Zhang,

He Zhang,

Yuqian Zhou,

Soo Ye Kim,

Qing Liu,

Yijun Li,

Jianming Zhang,

Nanxuan Zhao,

Yilin Wang,

Hui Ding,

Zhe Lin,

Hengshuang Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xi and Zhang, Zhifei and Zhang, He and Zhou, Yuqian and Kim, Soo Ye and Liu, Qing and Li, Yijun and Zhang, Jianming and Zhao, Nanxuan and Wang, Yilin and Ding, Hui and Lin, Zhe and Zhao, Hengshuang}, title = {UniReal: Universal Image Generation and Editing via Learning Real-world Dynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12501-12511} }
Pursuing Temporal-Consistent Video Virtual Try-On via Dynamic Pose Interaction: Dong Li,

Wenqi Zhong,

Wei Yu,

Yingwei Pan,

Dingwen Zhang,

Ting Yao,

Junwei Han,

Tao Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Dong and Zhong, Wenqi and Yu, Wei and Pan, Yingwei and Zhang, Dingwen and Yao, Ting and Han, Junwei and Mei, Tao}, title = {Pursuing Temporal-Consistent Video Virtual Try-On via Dynamic Pose Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22648-22657} }
Exploring Contextual Attribute Density in Referring Expression Counting: Zhicheng Wang,

Zhiyu Pan,

Zhan Peng,

Jian Cheng,

Liwen Xiao,

Wei Jiang,

Zhiguo Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhicheng and Pan, Zhiyu and Peng, Zhan and Cheng, Jian and Xiao, Liwen and Jiang, Wei and Cao, Zhiguo}, title = {Exploring Contextual Attribute Density in Referring Expression Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19587-19596} }
DINOv2 Meets Text: A Unified Framework for Image- and Pixel-Level Vision-Language Alignment: Cijo Jose,

Théo Moutakanni,

Dahyun Kang,

Federico Baldassarre,

Timothée Darcet,

Hu Xu,

Daniel Li,

Marc Szafraniec,

Michaël Ramamonjisoa,

Maxime Oquab,

Oriane Siméoni,

Huy V. Vo,

Patrick Labatut,

Piotr Bojanowski; [pdf] [supp]
[bibtex]
@InProceedings{Jose_2025_CVPR, author = {Jose, Cijo and Moutakanni, Th\'eo and Kang, Dahyun and Baldassarre, Federico and Darcet, Timoth\'ee and Xu, Hu and Li, Daniel and Szafraniec, Marc and Ramamonjisoa, Micha\"el and Oquab, Maxime and Sim\'eoni, Oriane and Vo, Huy V. and Labatut, Patrick and Bojanowski, Piotr}, title = {DINOv2 Meets Text: A Unified Framework for Image- and Pixel-Level Vision-Language Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24905-24916} }
Learning Affine Correspondences by Integrating Geometric Constraints: Pengju Sun,

Banglei Guan,

Zhenbao Yu,

Yang Shang,

Qifeng Yu,

Daniel Barath; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Pengju and Guan, Banglei and Yu, Zhenbao and Shang, Yang and Yu, Qifeng and Barath, Daniel}, title = {Learning Affine Correspondences by Integrating Geometric Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27038-27048} }
UCOD-DPL: Unsupervised Camouflaged Object Detection via Dynamic Pseudo-label Learning: Weiqi Yan,

Lvhai Chen,

Huaijia Kou,

Shengchuan Zhang,

Yan Zhang,

Liujuan Cao; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Weiqi and Chen, Lvhai and Kou, Huaijia and Zhang, Shengchuan and Zhang, Yan and Cao, Liujuan}, title = {UCOD-DPL: Unsupervised Camouflaged Object Detection via Dynamic Pseudo-label Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30365-30375} }
Geometry in Style: 3D Stylization via Surface Normal Deformation: Nam Anh Dinh,

Itai Lang,

Hyunwoo Kim,

Oded Stein,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dinh_2025_CVPR, author = {Dinh, Nam Anh and Lang, Itai and Kim, Hyunwoo and Stein, Oded and Hanocka, Rana}, title = {Geometry in Style: 3D Stylization via Surface Normal Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28456-28467} }
Multi-modal Vision Pre-training for Medical Image Analysis: Shaohao Rui,

Lingzhi Chen,

Zhenyu Tang,

Lilong Wang,

Mianxin Liu,

Shaoting Zhang,

Xiaosong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rui_2025_CVPR, author = {Rui, Shaohao and Chen, Lingzhi and Tang, Zhenyu and Wang, Lilong and Liu, Mianxin and Zhang, Shaoting and Wang, Xiaosong}, title = {Multi-modal Vision Pre-training for Medical Image Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5164-5174} }
SegMAN: Omni-scale Context Modeling with State Space Models and Local Attention for Semantic Segmentation: Yunxiang Fu,

Meng Lou,

Yizhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Yunxiang and Lou, Meng and Yu, Yizhou}, title = {SegMAN: Omni-scale Context Modeling with State Space Models and Local Attention for Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19077-19087} }
STEP: Enhancing Video-LLMs' Compositional Reasoning by Spatio-Temporal Graph-guided Self-Training: Haiyi Qiu,

Minghe Gao,

Long Qian,

Kaihang Pan,

Qifan Yu,

Juncheng Li,

Wenjie Wang,

Siliang Tang,

Yueting Zhuang,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Haiyi and Gao, Minghe and Qian, Long and Pan, Kaihang and Yu, Qifan and Li, Juncheng and Wang, Wenjie and Tang, Siliang and Zhuang, Yueting and Chua, Tat-Seng}, title = {STEP: Enhancing Video-LLMs' Compositional Reasoning by Spatio-Temporal Graph-guided Self-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3284-3294} }
OmniFlow: Any-to-Any Generation with Multi-Modal Rectified Flows: Shufan Li,

Konstantinos Kallidromitis,

Akash Gokul,

Zichun Liao,

Yusuke Kato,

Kazuki Kozuka,

Aditya Grover; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shufan and Kallidromitis, Konstantinos and Gokul, Akash and Liao, Zichun and Kato, Yusuke and Kozuka, Kazuki and Grover, Aditya}, title = {OmniFlow: Any-to-Any Generation with Multi-Modal Rectified Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13178-13188} }
PVC: Progressive Visual Token Compression for Unified Image and Video Processing in Large Vision-Language Models: Chenyu Yang,

Xuan Dong,

Xizhou Zhu,

Weijie Su,

Jiahao Wang,

Hao Tian,

Zhe Chen,

Wenhai Wang,

Lewei Lu,

Jifeng Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Chenyu and Dong, Xuan and Zhu, Xizhou and Su, Weijie and Wang, Jiahao and Tian, Hao and Chen, Zhe and Wang, Wenhai and Lu, Lewei and Dai, Jifeng}, title = {PVC: Progressive Visual Token Compression for Unified Image and Video Processing in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24939-24949} }
LIM: Large Interpolator Model for Dynamic Reconstruction: Remy Sabathier,

Niloy J. Mitra,

David Novotny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sabathier_2025_CVPR, author = {Sabathier, Remy and Mitra, Niloy J. and Novotny, David}, title = {LIM: Large Interpolator Model for Dynamic Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6154-6164} }
Multiple Object Tracking as ID Prediction: Ruopeng Gao,

Ji Qi,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Ruopeng and Qi, Ji and Wang, Limin}, title = {Multiple Object Tracking as ID Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27883-27893} }
AutoPresent: Designing Structured Visuals from Scratch: Jiaxin Ge,

Zora Zhiruo Wang,

Xuhui Zhou,

Yi-Hao Peng,

Sanjay Subramanian,

Qinyue Tan,

Maarten Sap,

Alane Suhr,

Daniel Fried,

Graham Neubig,

Trevor Darrell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2025_CVPR, author = {Ge, Jiaxin and Wang, Zora Zhiruo and Zhou, Xuhui and Peng, Yi-Hao and Subramanian, Sanjay and Tan, Qinyue and Sap, Maarten and Suhr, Alane and Fried, Daniel and Neubig, Graham and Darrell, Trevor}, title = {AutoPresent: Designing Structured Visuals from Scratch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2902-2911} }
SLAM3R: Real-Time Dense Scene Reconstruction from Monocular RGB Videos: Yuzheng Liu,

Siyan Dong,

Shuzhe Wang,

Yingda Yin,

Yanchao Yang,

Qingnan Fan,

Baoquan Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzheng and Dong, Siyan and Wang, Shuzhe and Yin, Yingda and Yang, Yanchao and Fan, Qingnan and Chen, Baoquan}, title = {SLAM3R: Real-Time Dense Scene Reconstruction from Monocular RGB Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16651-16662} }
PIDLoc: Cross-View Pose Optimization Network Inspired by PID Controllers: Wooju Lee,

Juhye Park,

Dasol Hong,

Changki Sung,

Youngwoo Seo,

DongWan Kang,

Hyun Myung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Wooju and Park, Juhye and Hong, Dasol and Sung, Changki and Seo, Youngwoo and Kang, DongWan and Myung, Hyun}, title = {PIDLoc: Cross-View Pose Optimization Network Inspired by PID Controllers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21981-21990} }
VisionArena: 230k Real World User-VLM Conversations with Preference Labels: Christopher Chou,

Lisa Dunlap,

Koki Mashita,

Krishna Mandal,

Trevor Darrell,

Ion Stoica,

Joseph E. Gonzalez,

Wei-Lin Chiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chou_2025_CVPR, author = {Chou, Christopher and Dunlap, Lisa and Mashita, Koki and Mandal, Krishna and Darrell, Trevor and Stoica, Ion and Gonzalez, Joseph E. and Chiang, Wei-Lin}, title = {VisionArena: 230k Real World User-VLM Conversations with Preference Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3877-3887} }
FAM Diffusion: Frequency and Attention Modulation for High-Resolution Image Generation with Stable Diffusion: Haosen Yang,

Adrian Bulat,

Isma Hadji,

Hai X. Pham,

Xiatian Zhu,

Georgios Tzimiropoulos,

Brais Martinez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Haosen and Bulat, Adrian and Hadji, Isma and Pham, Hai X. and Zhu, Xiatian and Tzimiropoulos, Georgios and Martinez, Brais}, title = {FAM Diffusion: Frequency and Attention Modulation for High-Resolution Image Generation with Stable Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2459-2468} }
DreamOmni: Unified Image Generation and Editing: Bin Xia,

Yuechen Zhang,

Jingyao Li,

Chengyao Wang,

Yitong Wang,

Xinglong Wu,

Bei Yu,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Bin and Zhang, Yuechen and Li, Jingyao and Wang, Chengyao and Wang, Yitong and Wu, Xinglong and Yu, Bei and Jia, Jiaya}, title = {DreamOmni: Unified Image Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28533-28543} }
Hash3D: Training-free Acceleration for 3D Generation: Xingyi Yang,

Songhua Liu,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Xingyi and Liu, Songhua and Wang, Xinchao}, title = {Hash3D: Training-free Acceleration for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21481-21491} }
SemGeoMo: Dynamic Contextual Human Motion Generation with Semantic and Geometric Guidance: Peishan Cong,

Ziyi Wang,

Yuexin Ma,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cong_2025_CVPR, author = {Cong, Peishan and Wang, Ziyi and Ma, Yuexin and Yue, Xiangyu}, title = {SemGeoMo: Dynamic Contextual Human Motion Generation with Semantic and Geometric Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17561-17570} }
MultiGO: Towards Multi-level Geometry Learning for Monocular 3D Textured Human Reconstruction: Gangjian Zhang,

Nanjie Yao,

Shunsi Zhang,

Hanfeng Zhao,

Guoliang Pang,

Jian Shu,

Hao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Gangjian and Yao, Nanjie and Zhang, Shunsi and Zhao, Hanfeng and Pang, Guoliang and Shu, Jian and Wang, Hao}, title = {MultiGO: Towards Multi-level Geometry Learning for Monocular 3D Textured Human Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {338-347} }
Generative Photomontage: Sean J. Liu,

Nupur Kumari,

Ariel Shamir,

Jun-Yan Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Sean J. and Kumari, Nupur and Shamir, Ariel and Zhu, Jun-Yan}, title = {Generative Photomontage}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7931-7941} }
Multi-view Reconstruction via SfM-guided Monocular Depth Estimation: Haoyu Guo,

He Zhu,

Sida Peng,

Haotong Lin,

Yunzhi Yan,

Tao Xie,

Wenguan Wang,

Xiaowei Zhou,

Hujun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Haoyu and Zhu, He and Peng, Sida and Lin, Haotong and Yan, Yunzhi and Xie, Tao and Wang, Wenguan and Zhou, Xiaowei and Bao, Hujun}, title = {Multi-view Reconstruction via SfM-guided Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5272-5282} }
Learning Hazing to Dehazing: Towards Realistic Haze Generation for Real-World Image Dehazing: Ruiyi Wang,

Yushuo Zheng,

Zicheng Zhang,

Chunyi Li,

Shuaicheng Liu,

Guangtao Zhai,

Xiaohong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ruiyi and Zheng, Yushuo and Zhang, Zicheng and Li, Chunyi and Liu, Shuaicheng and Zhai, Guangtao and Liu, Xiaohong}, title = {Learning Hazing to Dehazing: Towards Realistic Haze Generation for Real-World Image Dehazing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23091-23100} }
HuMoCon: Concept Discovery for Human Motion Understanding: Qihang Fang,

Chengcheng Tang,

Bugra Tekin,

Shugao Ma,

Yanchao Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Qihang and Tang, Chengcheng and Tekin, Bugra and Ma, Shugao and Yang, Yanchao}, title = {HuMoCon: Concept Discovery for Human Motion Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7179-7190} }
RUBIK: A Structured Benchmark for Image Matching across Geometric Challenges: Thibaut Loiseau,

Guillaume Bourmaud; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Loiseau_2025_CVPR, author = {Loiseau, Thibaut and Bourmaud, Guillaume}, title = {RUBIK: A Structured Benchmark for Image Matching across Geometric Challenges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27070-27080} }
Fast and Accurate Gigapixel Pathological Image Classification with Hierarchical Distillation Multi-Instance Learning: Jiuyang Dong,

Junjun Jiang,

Kui Jiang,

Jiahan Li,

Yongbing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Jiuyang and Jiang, Junjun and Jiang, Kui and Li, Jiahan and Zhang, Yongbing}, title = {Fast and Accurate Gigapixel Pathological Image Classification with Hierarchical Distillation Multi-Instance Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30818-30828} }
FreeScene: Mixed Graph Diffusion for 3D Scene Synthesis from Free Prompts: Tongyuan Bai,

Wangyuanfan Bai,

Dong Chen,

Tieru Wu,

Manyi Li,

Rui Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Tongyuan and Bai, Wangyuanfan and Chen, Dong and Wu, Tieru and Li, Manyi and Ma, Rui}, title = {FreeScene: Mixed Graph Diffusion for 3D Scene Synthesis from Free Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5893-5903} }
Rethinking Correspondence-based Category-Level Object Pose Estimation: Huan Ren,

Wenfei Yang,

Shifeng Zhang,

Tianzhu Zhang; [pdf]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Huan and Yang, Wenfei and Zhang, Shifeng and Zhang, Tianzhu}, title = {Rethinking Correspondence-based Category-Level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1170-1179} }
Curriculum Direct Preference Optimization for Diffusion and Consistency Models: Florinel-Alin Croitoru,

Vlad Hondru,

Radu Tudor Ionescu,

Nicu Sebe,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Croitoru_2025_CVPR, author = {Croitoru, Florinel-Alin and Hondru, Vlad and Ionescu, Radu Tudor and Sebe, Nicu and Shah, Mubarak}, title = {Curriculum Direct Preference Optimization for Diffusion and Consistency Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2824-2834} }
IncEventGS: Pose-Free Gaussian Splatting from a Single Event Camera: Jian Huang,

Chengrui Dong,

Xuanhua Chen,

Peidong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jian and Dong, Chengrui and Chen, Xuanhua and Liu, Peidong}, title = {IncEventGS: Pose-Free Gaussian Splatting from a Single Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26933-26942} }
OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection: Max Gutbrod,

David Rauber,

Danilo Weber Nunes,

Christoph Palm; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gutbrod_2025_CVPR, author = {Gutbrod, Max and Rauber, David and Nunes, Danilo Weber and Palm, Christoph}, title = {OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25874-25886} }
Detecting Open World Objects via Partial Attribute Assignment: Muli Yang,

Gabriel James Goenawan,

Huaiyuan Qin,

Kai Han,

Xi Peng,

Yanhua Yang,

Hongyuan Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Muli and Goenawan, Gabriel James and Qin, Huaiyuan and Han, Kai and Peng, Xi and Yang, Yanhua and Zhu, Hongyuan}, title = {Detecting Open World Objects via Partial Attribute Assignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20318-20328} }
FactCheXcker: Mitigating Measurement Hallucinations in Chest X-ray Report Generation Models: Alice Heiman,

Xiaoman Zhang,

Emma Chen,

Sung Eun Kim,

Pranav Rajpurkar; [pdf] [supp]
[bibtex]
@InProceedings{Heiman_2025_CVPR, author = {Heiman, Alice and Zhang, Xiaoman and Chen, Emma and Kim, Sung Eun and Rajpurkar, Pranav}, title = {FactCheXcker: Mitigating Measurement Hallucinations in Chest X-ray Report Generation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30787-30796} }
Neural Inverse Rendering from Propagating Light: Anagh Malik,

Benjamin Attal,

Andrew Xie,

Matthew O'Toole,

David B. Lindell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Malik_2025_CVPR, author = {Malik, Anagh and Attal, Benjamin and Xie, Andrew and O'Toole, Matthew and Lindell, David B.}, title = {Neural Inverse Rendering from Propagating Light}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10534-10544} }
When the Future Becomes the Past: Taming Temporal Correspondence for Self-supervised Video Representation Learning: Yang Liu,

Qianqian Xu,

Peisong Wen,

Siran Dai,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yang and Xu, Qianqian and Wen, Peisong and Dai, Siran and Huang, Qingming}, title = {When the Future Becomes the Past: Taming Temporal Correspondence for Self-supervised Video Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24033-24044} }
Personalized Preference Fine-tuning of Diffusion Models: Meihua Dang,

Anikait Singh,

Linqi Zhou,

Stefano Ermon,

Jiaming Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2025_CVPR, author = {Dang, Meihua and Singh, Anikait and Zhou, Linqi and Ermon, Stefano and Song, Jiaming}, title = {Personalized Preference Fine-tuning of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8020-8030} }
DecoupledGaussian: Object-Scene Decoupling for Physics-Based Interaction: Miaowei Wang,

Yibo Zhang,

Weiwei Xu,

Rui Ma,

Changqing Zou,

Daniel Morris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Miaowei and Zhang, Yibo and Xu, Weiwei and Ma, Rui and Zou, Changqing and Morris, Daniel}, title = {DecoupledGaussian: Object-Scene Decoupling for Physics-Based Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11361-11372} }
UniPose: A Unified Multimodal Framework for Human Pose Comprehension, Generation and Editing: Yiheng Li,

Ruibing Hou,

Hong Chang,

Shiguang Shan,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yiheng and Hou, Ruibing and Chang, Hong and Shan, Shiguang and Chen, Xilin}, title = {UniPose: A Unified Multimodal Framework for Human Pose Comprehension, Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27805-27815} }
POMP: Physics-consistent Motion Generative Model through Phase Manifolds: Bin Ji,

Ye Pan,

Zhimeng Liu,

Shuai Tan,

Xiaogang Jin,

Xiaokang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Bin and Pan, Ye and Liu, Zhimeng and Tan, Shuai and Jin, Xiaogang and Yang, Xiaokang}, title = {POMP: Physics-consistent Motion Generative Model through Phase Manifolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22690-22701} }
NN-Former: Rethinking Graph Structure in Neural Architecture Representation: Ruihan Xu,

Haokui Zhang,

Yaowei Wang,

Wei Zeng,

Shiliang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Ruihan and Zhang, Haokui and Wang, Yaowei and Zeng, Wei and Zhang, Shiliang}, title = {NN-Former: Rethinking Graph Structure in Neural Architecture Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10004-10014} }
DashGaussian: Optimizing 3D Gaussian Splatting in 200 Seconds: Youyu Chen,

Junjun Jiang,

Kui Jiang,

Xiao Tang,

Zhihao Li,

Xianming Liu,

Yinyu Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Youyu and Jiang, Junjun and Jiang, Kui and Tang, Xiao and Li, Zhihao and Liu, Xianming and Nie, Yinyu}, title = {DashGaussian: Optimizing 3D Gaussian Splatting in 200 Seconds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11146-11155} }
Reasoning to Attend: Try to Understand How <SEG> Token Works: Rui Qian,

Xin Yin,

Dejing Dou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Rui and Yin, Xin and Dou, Dejing}, title = {Reasoning to Attend: Try to Understand How \ensuremath{<}SEG\ensuremath{>} Token Works}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24722-24731} }
ReSpec: Relevance and Specificity Grounded Online Filtering for Learning on Video-Text Data Streams: Chris Dongjoo Kim,

Jihwan Moon,

Sangwoo Moon,

Heeseung Yun,

Sihaeng Lee,

Aniruddha Kembhavi,

Soonyoung Lee,

Gunhee Kim,

Sangho Lee,

Christopher Clark; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Chris Dongjoo and Moon, Jihwan and Moon, Sangwoo and Yun, Heeseung and Lee, Sihaeng and Kembhavi, Aniruddha and Lee, Soonyoung and Kim, Gunhee and Lee, Sangho and Clark, Christopher}, title = {ReSpec: Relevance and Specificity Grounded Online Filtering for Learning on Video-Text Data Streams}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29040-29049} }
A Unified Image-Dense Annotation Generation Model for Underwater Scenes: Hongkai Lin,

Dingkang Liang,

Zhenghao Qi,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Hongkai and Liang, Dingkang and Qi, Zhenghao and Bai, Xiang}, title = {A Unified Image-Dense Annotation Generation Model for Underwater Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {961-970} }
PACT: Pruning and Clustering-Based Token Reduction for Faster Visual Language Models: Mohamed Dhouib,

Davide Buscaldi,

Sonia Vanier,

Aymen Shabou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dhouib_2025_CVPR, author = {Dhouib, Mohamed and Buscaldi, Davide and Vanier, Sonia and Shabou, Aymen}, title = {PACT: Pruning and Clustering-Based Token Reduction for Faster Visual Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14582-14592} }
R-SCoRe: Revisiting Scene Coordinate Regression for Robust Large-Scale Visual Localization: Xudong Jiang,

Fangjinhua Wang,

Silvano Galliani,

Christoph Vogel,

Marc Pollefeys; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Xudong and Wang, Fangjinhua and Galliani, Silvano and Vogel, Christoph and Pollefeys, Marc}, title = {R-SCoRe: Revisiting Scene Coordinate Regression for Robust Large-Scale Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11536-11546} }
DynRefer: Delving into Region-level Multimodal Tasks via Dynamic Resolution: Yuzhong Zhao,

Feng Liu,

Yue Liu,

Mingxiang Liao,

Chen Gong,

Qixiang Ye,

Fang Wan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yuzhong and Liu, Feng and Liu, Yue and Liao, Mingxiang and Gong, Chen and Ye, Qixiang and Wan, Fang}, title = {DynRefer: Delving into Region-level Multimodal Tasks via Dynamic Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24742-24752} }
Playing the Fool: Jailbreaking LLMs and Multimodal LLMs with Out-of-Distribution Strategy: Joonhyun Jeong,

Seyun Bae,

Yeonsung Jung,

Jaeryong Hwang,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Joonhyun and Bae, Seyun and Jung, Yeonsung and Hwang, Jaeryong and Yang, Eunho}, title = {Playing the Fool: Jailbreaking LLMs and Multimodal LLMs with Out-of-Distribution Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29937-29946} }
Style Evolving along Chain-of-Thought for Unknown-Domain Object Detection: Zihao Zhang,

Aming Wu,

Yahong Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihao and Wu, Aming and Han, Yahong}, title = {Style Evolving along Chain-of-Thought for Unknown-Domain Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14225-14234} }
NTR-Gaussian: Nighttime Dynamic Thermal Reconstruction with 4D Gaussian Splatting Based on Thermodynamics: Kun Yang,

Yuxiang Liu,

Zeyu Cui,

Yu Liu,

Maojun Zhang,

Shen Yan,

Qing Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Kun and Liu, Yuxiang and Cui, Zeyu and Liu, Yu and Zhang, Maojun and Yan, Shen and Wang, Qing}, title = {NTR-Gaussian: Nighttime Dynamic Thermal Reconstruction with 4D Gaussian Splatting Based on Thermodynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {691-700} }
OmniSplat: Taming Feed-Forward 3D Gaussian Splatting for Omnidirectional Images with Editable Capabilities: Suyoung Lee,

Jaeyoung Chung,

Kihoon Kim,

Jaeyoo Huh,

Gunhee Lee,

Minsoo Lee,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Suyoung and Chung, Jaeyoung and Kim, Kihoon and Huh, Jaeyoo and Lee, Gunhee and Lee, Minsoo and Lee, Kyoung Mu}, title = {OmniSplat: Taming Feed-Forward 3D Gaussian Splatting for Omnidirectional Images with Editable Capabilities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16356-16365} }
VideoWorld: Exploring Knowledge Learning from Unlabeled Videos: Zhongwei Ren,

Yunchao Wei,

Xun Guo,

Yao Zhao,

Bingyi Kang,

Jiashi Feng,

Xiaojie Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Zhongwei and Wei, Yunchao and Guo, Xun and Zhao, Yao and Kang, Bingyi and Feng, Jiashi and Jin, Xiaojie}, title = {VideoWorld: Exploring Knowledge Learning from Unlabeled Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29029-29039} }
FSHNet: Fully Sparse Hybrid Network for 3D Object Detection: Shuai Liu,

Mingyue Cui,

Boyang Li,

Quanmin Liang,

Tinghe Hong,

Kai Huang,

Yunxiao Shan,

Kai Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shuai and Cui, Mingyue and Li, Boyang and Liang, Quanmin and Hong, Tinghe and Huang, Kai and Shan, Yunxiao and Huang, Kai}, title = {FSHNet: Fully Sparse Hybrid Network for 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8900-8909} }
3D-SLNR: A Super Lightweight Neural Representation for Large-scale 3D Mapping: Chenhui Shi,

Fulin Tang,

Ning An,

Yihong Wu; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Chenhui and Tang, Fulin and An, Ning and Wu, Yihong}, title = {3D-SLNR: A Super Lightweight Neural Representation for Large-scale 3D Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27233-27242} }
UniVAD: A Training-free Unified Model for Few-shot Visual Anomaly Detection: Zhaopeng Gu,

Bingke Zhu,

Guibo Zhu,

Yingying Chen,

Ming Tang,

Jinqiao Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Zhaopeng and Zhu, Bingke and Zhu, Guibo and Chen, Yingying and Tang, Ming and Wang, Jinqiao}, title = {UniVAD: A Training-free Unified Model for Few-shot Visual Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15194-15203} }
STINR: Deciphering Spatial Transcriptomics via Implicit Neural Representation: Yisi Luo,

Xile Zhao,

Kai Ye,

Deyu Meng; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Yisi and Zhao, Xile and Ye, Kai and Meng, Deyu}, title = {STINR: Deciphering Spatial Transcriptomics via Implicit Neural Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25930-25939} }
Remote Photoplethysmography in Real-World and Extreme Lighting Scenarios: Hang Shao,

Lei Luo,

Jianjun Qian,

Mengkai Yan,

Shuo Chen,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Hang and Luo, Lei and Qian, Jianjun and Yan, Mengkai and Chen, Shuo and Yang, Jian}, title = {Remote Photoplethysmography in Real-World and Extreme Lighting Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10858-10867} }
Multi-Modal Contrastive Masked Autoencoders: A Two-Stage Progressive Pre-training Approach for RGBD Datasets: Muhammad Abdullah Jamal,

Omid Mohareri; [pdf] [supp]
[bibtex]
@InProceedings{Jamal_2025_CVPR, author = {Jamal, Muhammad Abdullah and Mohareri, Omid}, title = {Multi-Modal Contrastive Masked Autoencoders: A Two-Stage Progressive Pre-training Approach for RGBD Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17947-17957} }
JTD-UAV: MLLM-Enhanced Joint Tracking and Description Framework for Anti-UAV Systems: Yifan Wang,

Jian Zhao,

Zhaoxin Fan,

Xin Zhang,

Xuecheng Wu,

Yudian Zhang,

Lei Jin,

Xinyue Li,

Gang Wang,

Mengxi Jia,

Ping Hu,

Zheng Zhu,

Xuelong Li; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yifan and Zhao, Jian and Fan, Zhaoxin and Zhang, Xin and Wu, Xuecheng and Zhang, Yudian and Jin, Lei and Li, Xinyue and Wang, Gang and Jia, Mengxi and Hu, Ping and Zhu, Zheng and Li, Xuelong}, title = {JTD-UAV: MLLM-Enhanced Joint Tracking and Description Framework for Anti-UAV Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1633-1644} }
HaWoR: World-Space Hand Motion Reconstruction from Egocentric Videos: Jinglei Zhang,

Jiankang Deng,

Chao Ma,

Rolandos Alexandros Potamias; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinglei and Deng, Jiankang and Ma, Chao and Potamias, Rolandos Alexandros}, title = {HaWoR: World-Space Hand Motion Reconstruction from Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1805-1815} }
Font-Agent: Enhancing Font Understanding with Large Language Models: Yingxin Lai,

Cuijie Xu,

Haitian Shi,

Guoqing Yang,

Xiaoning Li,

Zhiming Luo,

Shaozi Li; [pdf]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Yingxin and Xu, Cuijie and Shi, Haitian and Yang, Guoqing and Li, Xiaoning and Luo, Zhiming and Li, Shaozi}, title = {Font-Agent: Enhancing Font Understanding with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19670-19680} }
Secret Lies in Color: Enhancing AI-Generated Images Detection with Color Distribution Analysis: Zexi Jia,

Chuanwei Huang,

Yeshuang Zhu,

Hongyan Fei,

Xiaoyue Duan,

Zhiqiang Yuan,

Ying Deng,

Jiapei Zhang,

Jinchao Zhang,

Jie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Zexi and Huang, Chuanwei and Zhu, Yeshuang and Fei, Hongyan and Duan, Xiaoyue and Yuan, Zhiqiang and Deng, Ying and Zhang, Jiapei and Zhang, Jinchao and Zhou, Jie}, title = {Secret Lies in Color: Enhancing AI-Generated Images Detection with Color Distribution Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13445-13454} }
RADIOv2.5: Improved Baselines for Agglomerative Vision Foundation Models: Greg Heinrich,

Mike Ranzinger,

Hongxu Yin,

Yao Lu,

Jan Kautz,

Andrew Tao,

Bryan Catanzaro,

Pavlo Molchanov; [pdf] [supp]
[bibtex]
@InProceedings{Heinrich_2025_CVPR, author = {Heinrich, Greg and Ranzinger, Mike and Yin, Hongxu and Lu, Yao and Kautz, Jan and Tao, Andrew and Catanzaro, Bryan and Molchanov, Pavlo}, title = {RADIOv2.5: Improved Baselines for Agglomerative Vision Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22487-22497} }
High Temporal Consistency through Semantic Similarity Propagation in Semi-Supervised Video Semantic Segmentation for Autonomous Flight: Cédric Vincent,

Taehyoung Kim,

Henri Meeß; [pdf] [supp]
[bibtex]
@InProceedings{Vincent_2025_CVPR, author = {Vincent, C\'edric and Kim, Taehyoung and Mee{\ss}, Henri}, title = {High Temporal Consistency through Semantic Similarity Propagation in Semi-Supervised Video Semantic Segmentation for Autonomous Flight}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1461-1471} }
Cross-Modal and Uncertainty-Aware Agglomeration for Open-Vocabulary 3D Scene Understanding: Jinlong Li,

Cristiano Saltori,

Fabio Poiesi,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jinlong and Saltori, Cristiano and Poiesi, Fabio and Sebe, Nicu}, title = {Cross-Modal and Uncertainty-Aware Agglomeration for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19390-19400} }
Generative Gaussian Splatting for Unbounded 3D City Generation: Haozhe Xie,

Zhaoxi Chen,

Fangzhou Hong,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Haozhe and Chen, Zhaoxi and Hong, Fangzhou and Liu, Ziwei}, title = {Generative Gaussian Splatting for Unbounded 3D City Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6111-6120} }
SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic Video Situation: Hao Du,

Bo Wu,

Yan Lu,

Zhendong Mao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Hao and Wu, Bo and Lu, Yan and Mao, Zhendong}, title = {SVLTA: Benchmarking Vision-Language Temporal Alignment via Synthetic Video Situation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13798-13809} }
Mixture of Submodules for Domain Adaptive Person Search: Minsu Kim,

Seungryong Kim,

Kwanghoon Sohn; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Minsu and Kim, Seungryong and Sohn, Kwanghoon}, title = {Mixture of Submodules for Domain Adaptive Person Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13990-14001} }
Unsupervised Discovery of Facial Landmarks and Head Pose: Satyajit Tourani,

Siddharth Tourani,

Arif Mahmood,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{Tourani_2025_CVPR, author = {Tourani, Satyajit and Tourani, Siddharth and Mahmood, Arif and Khan, Muhammad Haris}, title = {Unsupervised Discovery of Facial Landmarks and Head Pose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21192-21202} }
Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning: Sherry X. Chen,

Misha Sra,

Pradeep Sen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Sherry X. and Sra, Misha and Sen, Pradeep}, title = {Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28513-28522} }
Stabilizing and Accelerating Autofocus with Expert Trajectory Regularized Deep Reinforcement Learning: Shouhang Zhu,

Chenglin Li,

Yuankun Jiang,

Li Wei,

Nuowen Kan,

Ziyang Zheng,

Wenrui Dai,

Junni Zou,

Hongkai Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Shouhang and Li, Chenglin and Jiang, Yuankun and Wei, Li and Kan, Nuowen and Zheng, Ziyang and Dai, Wenrui and Zou, Junni and Xiong, Hongkai}, title = {Stabilizing and Accelerating Autofocus with Expert Trajectory Regularized Deep Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26440-26450} }
SharpDepth: Sharpening Metric Depth Predictions Using Diffusion Distillation: Duc-Hai Pham,

Tung Do,

Phong Nguyen,

Binh-Son Hua,

Khoi Nguyen,

Rang Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2025_CVPR, author = {Pham, Duc-Hai and Do, Tung and Nguyen, Phong and Hua, Binh-Son and Nguyen, Khoi and Nguyen, Rang}, title = {SharpDepth: Sharpening Metric Depth Predictions Using Diffusion Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17060-17069} }
Repurposing Stable Diffusion Attention for Training-Free Unsupervised Interactive Segmentation: Markus Karmann,

Onay Urfalioglu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karmann_2025_CVPR, author = {Karmann, Markus and Urfalioglu, Onay}, title = {Repurposing Stable Diffusion Attention for Training-Free Unsupervised Interactive Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24518-24528} }
GO-N3RDet: Geometry Optimized NeRF-enhanced 3D Object Detector: Zechuan Li,

Hongshan Yu,

Yihao Ding,

Jinhao Qiao,

Basim Azam,

Naveed Akhtar; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zechuan and Yu, Hongshan and Ding, Yihao and Qiao, Jinhao and Azam, Basim and Akhtar, Naveed}, title = {GO-N3RDet: Geometry Optimized NeRF-enhanced 3D Object Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27211-27221} }
DPSeg: Dual-Prompt Cost Volume Learning for Open-Vocabulary Semantic Segmentation: Ziyu Zhao,

Xiaoguang Li,

Lingjia Shi,

Nasrin Imanpour,

Song Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ziyu and Li, Xiaoguang and Shi, Lingjia and Imanpour, Nasrin and Wang, Song}, title = {DPSeg: Dual-Prompt Cost Volume Learning for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25346-25356} }
EvEnhancer: Empowering Effectiveness, Efficiency and Generalizability for Continuous Space-Time Video Super-Resolution with Events: Shuoyan Wei,

Feng Li,

Shengeng Tang,

Yao Zhao,

Huihui Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Shuoyan and Li, Feng and Tang, Shengeng and Zhao, Yao and Bai, Huihui}, title = {EvEnhancer: Empowering Effectiveness, Efficiency and Generalizability for Continuous Space-Time Video Super-Resolution with Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17755-17766} }
Seeing A 3D World in A Grain of Sand: Yufan Zhang,

Yu Ji,

Yu Guo,

Jinwei Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yufan and Ji, Yu and Guo, Yu and Ye, Jinwei}, title = {Seeing A 3D World in A Grain of Sand}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11187-11196} }
Simulator HC: Regression-based Online Simulation of Starting Problem-Solution Pairs for Homotopy Continuation in Geometric Vision: Xinyue Zhang,

Zijia Dai,

Wanting Xu,

Laurent Kneip; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xinyue and Dai, Zijia and Xu, Wanting and Kneip, Laurent}, title = {Simulator HC: Regression-based Online Simulation of Starting Problem-Solution Pairs for Homotopy Continuation in Geometric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27103-27112} }
Dynamic Integration of Task-Specific Adapters for Class Incremental Learning: Jiashuo Li,

Shaokun Wang,

Bo Qian,

Yuhang He,

Xing Wei,

Qiang Wang,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiashuo and Wang, Shaokun and Qian, Bo and He, Yuhang and Wei, Xing and Wang, Qiang and Gong, Yihong}, title = {Dynamic Integration of Task-Specific Adapters for Class Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30545-30555} }
MoFlow: One-Step Flow Matching for Human Trajectory Forecasting via Implicit Maximum Likelihood Estimation based Distillation: Yuxiang Fu,

Qi Yan,

Lele Wang,

Ke Li,

Renjie Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Yuxiang and Yan, Qi and Wang, Lele and Li, Ke and Liao, Renjie}, title = {MoFlow: One-Step Flow Matching for Human Trajectory Forecasting via Implicit Maximum Likelihood Estimation based Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17282-17293} }
EgoPressure: A Dataset for Hand Pressure and Pose Estimation in Egocentric Vision: Yiming Zhao,

Taein Kwon,

Paul Streli,

Marc Pollefeys,

Christian Holz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yiming and Kwon, Taein and Streli, Paul and Pollefeys, Marc and Holz, Christian}, title = {EgoPressure: A Dataset for Hand Pressure and Pose Estimation in Egocentric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27727-27738} }
DiverseFlow: Sample-Efficient Diverse Mode Coverage in Flows: Mashrur M. Morshed,

Vishnu Boddeti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morshed_2025_CVPR, author = {Morshed, Mashrur M. and Boddeti, Vishnu}, title = {DiverseFlow: Sample-Efficient Diverse Mode Coverage in Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23303-23312} }
Reason-before-Retrieve: One-Stage Reflective Chain-of-Thoughts for Training-Free Zero-Shot Composed Image Retrieval: Yuanmin Tang,

Jue Zhang,

Xiaoting Qin,

Jing Yu,

Gaopeng Gou,

Gang Xiong,

Qingwei Lin,

Saravan Rajmohan,

Dongmei Zhang,

Qi Wu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yuanmin and Zhang, Jue and Qin, Xiaoting and Yu, Jing and Gou, Gaopeng and Xiong, Gang and Lin, Qingwei and Rajmohan, Saravan and Zhang, Dongmei and Wu, Qi}, title = {Reason-before-Retrieve: One-Stage Reflective Chain-of-Thoughts for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14400-14410} }
UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping: Wenbo Wang,

Fangyun Wei,

Lei Zhou,

Xi Chen,

Lin Luo,

Xiaohan Yi,

Yizhong Zhang,

Yaobo Liang,

Chang Xu,

Yan Lu,

Jiaolong Yang,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Wenbo and Wei, Fangyun and Zhou, Lei and Chen, Xi and Luo, Lin and Yi, Xiaohan and Zhang, Yizhong and Liang, Yaobo and Xu, Chang and Lu, Yan and Yang, Jiaolong and Guo, Baining}, title = {UniGraspTransformer: Simplified Policy Distillation for Scalable Dexterous Robotic Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12199-12208} }
GeoMM: On Geodesic Perspective for Multi-modal Learning: Shibin Mei,

Hang Wang,

Bingbing Ni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Shibin and Wang, Hang and Ni, Bingbing}, title = {GeoMM: On Geodesic Perspective for Multi-modal Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4776-4786} }
VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning: Xueqing Wu,

Yuheng Ding,

Bingxuan Li,

Pan Lu,

Da Yin,

Kai-Wei Chang,

Nanyun Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Xueqing and Ding, Yuheng and Li, Bingxuan and Lu, Pan and Yin, Da and Chang, Kai-Wei and Peng, Nanyun}, title = {VISCO: Benchmarking Fine-Grained Critique and Correction Towards Self-Improvement in Visual Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9527-9537} }
MaskGWM: A Generalizable Driving World Model with Video Mask Reconstruction: Jingcheng Ni,

Yuxin Guo,

Yichen Liu,

Rui Chen,

Lewei Lu,

Zehuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Jingcheng and Guo, Yuxin and Liu, Yichen and Chen, Rui and Lu, Lewei and Wu, Zehuan}, title = {MaskGWM: A Generalizable Driving World Model with Video Mask Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22381-22391} }
3D-MVP: 3D Multiview Pretraining for Manipulation: Shengyi Qian,

Kaichun Mo,

Valts Blukis,

David F. Fouhey,

Dieter Fox,

Ankit Goyal; [pdf]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Shengyi and Mo, Kaichun and Blukis, Valts and Fouhey, David F. and Fox, Dieter and Goyal, Ankit}, title = {3D-MVP: 3D Multiview Pretraining for Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22530-22539} }
Enhanced OoD Detection through Cross-Modal Alignment of Multi-Modal Representations: Jeonghyeon Kim,

Sangheum Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jeonghyeon and Hwang, Sangheum}, title = {Enhanced OoD Detection through Cross-Modal Alignment of Multi-Modal Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29979-29988} }
Adaptive Dropout: Unleashing Dropout across Layers for Generalizable Image Super-Resolution: Hang Xu,

Jie Huang,

Wei Yu,

Jiangtong Tan,

Zhen Zou,

Feng Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Hang and Huang, Jie and Yu, Wei and Tan, Jiangtong and Zou, Zhen and Zhao, Feng}, title = {Adaptive Dropout: Unleashing Dropout across Layers for Generalizable Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7513-7523} }
Breaking the Memory Barrier of Contrastive Loss via Tile-Based Strategy: Zesen Cheng,

Hang Zhang,

Kehan Li,

Sicong Leng,

Zhiqiang Hu,

Fei Wu,

Deli Zhao,

Xin Li,

Lidong Bing; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Zesen and Zhang, Hang and Li, Kehan and Leng, Sicong and Hu, Zhiqiang and Wu, Fei and Zhao, Deli and Li, Xin and Bing, Lidong}, title = {Breaking the Memory Barrier of Contrastive Loss via Tile-Based Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10036-10045} }
Mimir: Improving Video Diffusion Models for Precise Text Understanding: Shuai Tan,

Biao Gong,

Yutong Feng,

Kecheng Zheng,

Dandan Zheng,

Shuwei Shi,

Yujun Shen,

Jingdong Chen,

Ming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Shuai and Gong, Biao and Feng, Yutong and Zheng, Kecheng and Zheng, Dandan and Shi, Shuwei and Shen, Yujun and Chen, Jingdong and Yang, Ming}, title = {Mimir: Improving Video Diffusion Models for Precise Text Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23978-23988} }
UCM-VeID V2: A Richer Dataset and A Pre-training Method for UAV Cross-Modality Vehicle Re-Identification: Xingyue Liu,

Jiahao Qi,

Chen Chen,

KangCheng Bin,

Ping Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xingyue and Qi, Jiahao and Chen, Chen and Bin, KangCheng and Zhong, Ping}, title = {UCM-VeID V2: A Richer Dataset and A Pre-training Method for UAV Cross-Modality Vehicle Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22286-22295} }
Learning Phase Distortion with Selective State Space Models for Video Turbulence Mitigation: Xingguang Zhang,

Nicholas Chimitt,

Xijun Wang,

Yu Yuan,

Stanley H. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xingguang and Chimitt, Nicholas and Wang, Xijun and Yuan, Yu and Chan, Stanley H.}, title = {Learning Phase Distortion with Selective State Space Models for Video Turbulence Mitigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2127-2138} }
RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training: Raktim Gautam Goswami,

Prashanth Krishnamurthy,

Yann LeCun,

Farshad Khorrami; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Goswami_2025_CVPR, author = {Goswami, Raktim Gautam and Krishnamurthy, Prashanth and LeCun, Yann and Khorrami, Farshad}, title = {RoboPEPP: Vision-Based Robot Pose and Joint Angle Estimation through Embedding Predictive Pre-Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6930-6939} }
Distraction is All You Need for Multimodal Large Language Model Jailbreaking: Zuopeng Yang,

Jiluan Fan,

Anli Yan,

Erdun Gao,

Xin Lin,

Tao Li,

Kanghua Mo,

Changyu Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zuopeng and Fan, Jiluan and Yan, Anli and Gao, Erdun and Lin, Xin and Li, Tao and Mo, Kanghua and Dong, Changyu}, title = {Distraction is All You Need for Multimodal Large Language Model Jailbreaking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9467-9476} }
Apollo: An Exploration of Video Understanding in Large Multimodal Models: Orr Zohar,

Xiaohan Wang,

Yann Dubois,

Nikhil Mehta,

Tong Xiao,

Philippe Hansen-Estruch,

Licheng Yu,

Xiaofang Wang,

Felix Juefei-Xu,

Ning Zhang,

Serena Yeung-Levy,

Xide Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zohar_2025_CVPR, author = {Zohar, Orr and Wang, Xiaohan and Dubois, Yann and Mehta, Nikhil and Xiao, Tong and Hansen-Estruch, Philippe and Yu, Licheng and Wang, Xiaofang and Juefei-Xu, Felix and Zhang, Ning and Yeung-Levy, Serena and Xia, Xide}, title = {Apollo: An Exploration of Video Understanding in Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18891-18901} }
Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves: Shihan Wu,

Ji Zhang,

Pengpeng Zeng,

Lianli Gao,

Jingkuan Song,

Heng Tao Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Shihan and Zhang, Ji and Zeng, Pengpeng and Gao, Lianli and Song, Jingkuan and Shen, Heng Tao}, title = {Skip Tuning: Pre-trained Vision-Language Models are Effective and Efficient Adapters Themselves}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14723-14732} }
PatchDPO: Patch-level DPO for Finetuning-free Personalized Image Generation: Qihan Huang,

Long Chan,

Jinlong Liu,

Wanggui He,

Hao Jiang,

Mingli Song,

Jie Song; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Qihan and Chan, Long and Liu, Jinlong and He, Wanggui and Jiang, Hao and Song, Mingli and Song, Jie}, title = {PatchDPO: Patch-level DPO for Finetuning-free Personalized Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18369-18378} }
Learning to Normalize on the SPD Manifold under Bures-Wasserstein Geometry: Rui Wang,

Shaocheng Jin,

Ziheng Chen,

Xiaoqing Luo,

Xiao-Jun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Rui and Jin, Shaocheng and Chen, Ziheng and Luo, Xiaoqing and Wu, Xiao-Jun}, title = {Learning to Normalize on the SPD Manifold under Bures-Wasserstein Geometry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8289-8298} }
SAMWISE: Infusing Wisdom in SAM2 for Text-Driven Video Segmentation: Claudia Cuttano,

Gabriele Trivigno,

Gabriele Rosi,

Carlo Masone,

Giuseppe Averta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cuttano_2025_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Rosi, Gabriele and Masone, Carlo and Averta, Giuseppe}, title = {SAMWISE: Infusing Wisdom in SAM2 for Text-Driven Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3395-3405} }
MegaSaM: Accurate, Fast and Robust Structure and Motion from Casual Dynamic Videos: Zhengqi Li,

Richard Tucker,

Forrester Cole,

Qianqian Wang,

Linyi Jin,

Vickie Ye,

Angjoo Kanazawa,

Aleksander Holynski,

Noah Snavely; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhengqi and Tucker, Richard and Cole, Forrester and Wang, Qianqian and Jin, Linyi and Ye, Vickie and Kanazawa, Angjoo and Holynski, Aleksander and Snavely, Noah}, title = {MegaSaM: Accurate, Fast and Robust Structure and Motion from Casual Dynamic Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10486-10496} }
BEVDiffuser: Plug-and-Play Diffusion Model for BEV Denoising with Ground-Truth Guidance: Xin Ye,

Burhaneddin Yaman,

Sheng Cheng,

Feng Tao,

Abhirup Mallik,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Xin and Yaman, Burhaneddin and Cheng, Sheng and Tao, Feng and Mallik, Abhirup and Ren, Liu}, title = {BEVDiffuser: Plug-and-Play Diffusion Model for BEV Denoising with Ground-Truth Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1495-1504} }
GeoAvatar: Geometrically-Consistent Multi-Person Avatar Reconstruction from Sparse Multi-View Videos: Soohyun Lee,

Seoyeon Kim,

HeeKyung Lee,

Won-Sik Jeong,

Joo Ho Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Soohyun and Kim, Seoyeon and Lee, HeeKyung and Jeong, Won-Sik and Lee, Joo Ho}, title = {GeoAvatar: Geometrically-Consistent Multi-Person Avatar Reconstruction from Sparse Multi-View Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21138-21147} }
FinePhys: Fine-grained Human Action Generation by Explicitly Incorporating Physical Laws for Effective Skeletal Guidance: Dian Shao,

Mingfei Shi,

Shengda Xu,

Haodong Chen,

Yongle Huang,

Binglu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Dian and Shi, Mingfei and Xu, Shengda and Chen, Haodong and Huang, Yongle and Wang, Binglu}, title = {FinePhys: Fine-grained Human Action Generation by Explicitly Incorporating Physical Laws for Effective Skeletal Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1905-1916} }
DiET-GS: Diffusion Prior and Event Stream-Assisted Motion Deblurring 3D Gaussian Splatting: Seungjun Lee,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Seungjun and Lee, Gim Hee}, title = {DiET-GS: Diffusion Prior and Event Stream-Assisted Motion Deblurring 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21739-21749} }
Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives: Alex Hanson,

Allen Tu,

Geng Lin,

Vasu Singla,

Matthias Zwicker,

Tom Goldstein; [pdf] [supp]
[bibtex]
@InProceedings{Hanson_2025_CVPR, author = {Hanson, Alex and Tu, Allen and Lin, Geng and Singla, Vasu and Zwicker, Matthias and Goldstein, Tom}, title = {Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21537-21546} }
SeedVR: Seeding Infinity in Diffusion Transformer Towards Generic Video Restoration: Jianyi Wang,

Zhijie Lin,

Meng Wei,

Yang Zhao,

Ceyuan Yang,

Chen Change Loy,

Lu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jianyi and Lin, Zhijie and Wei, Meng and Zhao, Yang and Yang, Ceyuan and Loy, Chen Change and Jiang, Lu}, title = {SeedVR: Seeding Infinity in Diffusion Transformer Towards Generic Video Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2161-2172} }
Beyond Generation: A Diffusion-based Low-level Feature Extractor for Detecting AI-generated Images: Nan Zhong,

Haoyu Chen,

Yiran Xu,

Zhenxing Qian,

Xinpeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Nan and Chen, Haoyu and Xu, Yiran and Qian, Zhenxing and Zhang, Xinpeng}, title = {Beyond Generation: A Diffusion-based Low-level Feature Extractor for Detecting AI-generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8258-8268} }
Robust-MVTON: Learning Cross-Pose Feature Alignment and Fusion for Robust Multi-View Virtual Try-On: Nannan Zhang,

Yijiang Li,

Dong Du,

Zheng Chong,

Zhengwentai Sun,

Jianhao Zeng,

Yusheng Dai,

Zhengyu Xie,

Hairui Zhu,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Nannan and Li, Yijiang and Du, Dong and Chong, Zheng and Sun, Zhengwentai and Zeng, Jianhao and Dai, Yusheng and Xie, Zhengyu and Zhu, Hairui and Han, Xiaoguang}, title = {Robust-MVTON: Learning Cross-Pose Feature Alignment and Fusion for Robust Multi-View Virtual Try-On}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16029-16039} }
Omnia de EgoTempo: Benchmarking Temporal Understanding of Multi-Modal LLMs in Egocentric Videos: Chiara Plizzari,

Alessio Tonioni,

Yongqin Xian,

Achin Kulshrestha,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Plizzari_2025_CVPR, author = {Plizzari, Chiara and Tonioni, Alessio and Xian, Yongqin and Kulshrestha, Achin and Tombari, Federico}, title = {Omnia de EgoTempo: Benchmarking Temporal Understanding of Multi-Modal LLMs in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24129-24138} }
ODHSR: Online Dense 3D Reconstruction of Humans and Scenes from Monocular Videos: Zetong Zhang,

Manuel Kaufmann,

Lixin Xue,

Jie Song,

Martin R. Oswald; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zetong and Kaufmann, Manuel and Xue, Lixin and Song, Jie and Oswald, Martin R.}, title = {ODHSR: Online Dense 3D Reconstruction of Humans and Scenes from Monocular Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21824-21835} }
Identity-Preserving Text-to-Video Generation by Frequency Decomposition: Shenghai Yuan,

Jinfa Huang,

Xianyi He,

Yunyang Ge,

Yujun Shi,

Liuhan Chen,

Jiebo Luo,

Li Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Shenghai and Huang, Jinfa and He, Xianyi and Ge, Yunyang and Shi, Yujun and Chen, Liuhan and Luo, Jiebo and Yuan, Li}, title = {Identity-Preserving Text-to-Video Generation by Frequency Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12978-12988} }
FreeGave: 3D Physics Learning from Dynamic Videos by Gaussian Velocity: Jinxi Li,

Ziyang Song,

Siyuan Zhou,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jinxi and Song, Ziyang and Zhou, Siyuan and Yang, Bo}, title = {FreeGave: 3D Physics Learning from Dynamic Videos by Gaussian Velocity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12433-12443} }
SpiritSight Agent: Advanced GUI Agent with One Look: Zhiyuan Huang,

Ziming Cheng,

Junting Pan,

Zhaohui Hou,

Mingjie Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhiyuan and Cheng, Ziming and Pan, Junting and Hou, Zhaohui and Zhan, Mingjie}, title = {SpiritSight Agent: Advanced GUI Agent with One Look}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29490-29500} }
Zero-Shot Monocular Scene Flow Estimation in the Wild: Yiqing Liang,

Abhishek Badki,

Hang Su,

James Tompkin,

Orazio Gallo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yiqing and Badki, Abhishek and Su, Hang and Tompkin, James and Gallo, Orazio}, title = {Zero-Shot Monocular Scene Flow Estimation in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21031-21044} }
MG-MotionLLM: A Unified Framework for Motion Comprehension and Generation across Multiple Granularities: Bizhu Wu,

Jinheng Xie,

Keming Shen,

Zhe Kong,

Jianfeng Ren,

Ruibin Bai,

Rong Qu,

Linlin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Bizhu and Xie, Jinheng and Shen, Keming and Kong, Zhe and Ren, Jianfeng and Bai, Ruibin and Qu, Rong and Shen, Linlin}, title = {MG-MotionLLM: A Unified Framework for Motion Comprehension and Generation across Multiple Granularities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27849-27858} }
Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation: Yuheng Feng,

Changsong Wen,

Zelin Peng,

Li jiaye,

Siyu Zhu; [pdf]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Yuheng and Wen, Changsong and Peng, Zelin and jiaye, Li and Zhu, Siyu}, title = {Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24895-24904} }
MMRL: Multi-Modal Representation Learning for Vision-Language Models: Yuncheng Guo,

Xiaodong Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yuncheng and Gu, Xiaodong}, title = {MMRL: Multi-Modal Representation Learning for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25015-25025} }
Optical-Flow Guided Prompt Optimization for Coherent Video Generation: Hyelin Nam,

Jaemin Kim,

Dohun Lee,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_CVPR, author = {Nam, Hyelin and Kim, Jaemin and Lee, Dohun and Ye, Jong Chul}, title = {Optical-Flow Guided Prompt Optimization for Coherent Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7837-7846} }
MOS: Modeling Object-Scene Associations in Generalized Category Discovery: Zhengyuan Peng,

Jinpeng Ma,

Zhimin Sun,

Ran Yi,

Haichuan Song,

Xin Tan,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Zhengyuan and Ma, Jinpeng and Sun, Zhimin and Yi, Ran and Song, Haichuan and Tan, Xin and Ma, Lizhuang}, title = {MOS: Modeling Object-Scene Associations in Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15118-15128} }
Anchor-Aware Similarity Cohesion in Target Frames Enables Predicting Temporal Moment Boundaries in 2D: Jiawei Tan,

Hongxing Wang,

Junwu Weng,

Jiaxin Li,

Zhilong Ou,

Kang Dang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Jiawei and Wang, Hongxing and Weng, Junwu and Li, Jiaxin and Ou, Zhilong and Dang, Kang}, title = {Anchor-Aware Similarity Cohesion in Target Frames Enables Predicting Temporal Moment Boundaries in 2D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24180-24189} }
Test-time Augmentation Improves Efficiency in Conformal Prediction: Divya Shanmugam,

Helen Lu,

Swami Sankaranarayanan,

John Guttag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shanmugam_2025_CVPR, author = {Shanmugam, Divya and Lu, Helen and Sankaranarayanan, Swami and Guttag, John}, title = {Test-time Augmentation Improves Efficiency in Conformal Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20622-20631} }
Breaking the Low-Rank Dilemma of Linear Attention: Qihang Fan,

Huaibo Huang,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Qihang and Huang, Huaibo and He, Ran}, title = {Breaking the Low-Rank Dilemma of Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25271-25280} }
StoryGPT-V: Large Language Models as Consistent Story Visualizers: Xiaoqian Shen,

Mohamed Elhoseiny; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Xiaoqian and Elhoseiny, Mohamed}, title = {StoryGPT-V: Large Language Models as Consistent Story Visualizers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13273-13283} }
Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection: Enshen Zhou,

Qi Su,

Cheng Chi,

Zhizheng Zhang,

Zhongyuan Wang,

Tiejun Huang,

Lu Sheng,

He Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Enshen and Su, Qi and Chi, Cheng and Zhang, Zhizheng and Wang, Zhongyuan and Huang, Tiejun and Sheng, Lu and Wang, He}, title = {Code-as-Monitor: Constraint-aware Visual Programming for Reactive and Proactive Robotic Failure Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6919-6929} }
Embracing Collaboration Over Competition: Condensing Multiple Prompts for Visual In-Context Learning: Jinpeng Wang,

Tianci Luo,

Yaohua Zha,

Yan Feng,

Ruisheng Luo,

Bin Chen,

Tao Dai,

Long Chen,

Yaowei Wang,

Shu-Tao Xia; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jinpeng and Luo, Tianci and Zha, Yaohua and Feng, Yan and Luo, Ruisheng and Chen, Bin and Dai, Tao and Chen, Long and Wang, Yaowei and Xia, Shu-Tao}, title = {Embracing Collaboration Over Competition: Condensing Multiple Prompts for Visual In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25156-25165} }
Rethinking Reconstruction and Denoising in the Dark: New Perspective, General Architecture and Beyond: Tengyu Ma,

Long Ma,

Ziye Li,

Yuetong Wang,

Jinyuan Liu,

Chengpei Xu,

Risheng Liu; [pdf]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Tengyu and Ma, Long and Li, Ziye and Wang, Yuetong and Liu, Jinyuan and Xu, Chengpei and Liu, Risheng}, title = {Rethinking Reconstruction and Denoising in the Dark: New Perspective, General Architecture and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2323-2332} }
Edge-SD-SR: Low Latency and Parameter Efficient On-device Super-Resolution with Stable Diffusion via Bidirectional Conditioning: Isma Hadji,

Mehdi Noroozi,

Victor Escorcia,

Anestis Zaganidis,

Brais Martinez,

Georgios Tzimiropoulos; [pdf]
[bibtex]
@InProceedings{Hadji_2025_CVPR, author = {Hadji, Isma and Noroozi, Mehdi and Escorcia, Victor and Zaganidis, Anestis and Martinez, Brais and Tzimiropoulos, Georgios}, title = {Edge-SD-SR: Low Latency and Parameter Efficient On-device Super-Resolution with Stable Diffusion via Bidirectional Conditioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12789-12798} }
Unity in Diversity: Video Editing via Gradient-Latent Purification: Junyu Gao,

Kunlin Yang,

Xuan Yao,

Yufan Hu; [pdf]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Junyu and Yang, Kunlin and Yao, Xuan and Hu, Yufan}, title = {Unity in Diversity: Video Editing via Gradient-Latent Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23401-23411} }
Revealing Key Details to See Differences: A Novel Prototypical Perspective for Skeleton-based Action Recognition: Hongda Liu,

Yunfan Liu,

Min Ren,

Hao Wang,

Yunlong Wang,

Zhenan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hongda and Liu, Yunfan and Ren, Min and Wang, Hao and Wang, Yunlong and Sun, Zhenan}, title = {Revealing Key Details to See Differences: A Novel Prototypical Perspective for Skeleton-based Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29248-29257} }
Finsler Multi-Dimensional Scaling: Manifold Learning for Asymmetric Dimensionality Reduction and Embedding: Thomas Dagès,

Simon Weber,

Ya-Wei Eileen Lin,

Ronen Talmon,

Daniel Cremers,

Michael Lindenbaum,

Alfred M. Bruckstein,

Ron Kimmel; [pdf] [supp]
[bibtex]
@InProceedings{Dages_2025_CVPR, author = {Dag\`es, Thomas and Weber, Simon and Lin, Ya-Wei Eileen and Talmon, Ronen and Cremers, Daniel and Lindenbaum, Michael and Bruckstein, Alfred M. and Kimmel, Ron}, title = {Finsler Multi-Dimensional Scaling: Manifold Learning for Asymmetric Dimensionality Reduction and Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25842-25853} }
VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection: Songhao Han,

Wei Huang,

Hairong Shi,

Le Zhuo,

Xiu Su,

Shifeng Zhang,

Xu Zhou,

Xiaojuan Qi,

Yue Liao,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Songhao and Huang, Wei and Shi, Hairong and Zhuo, Le and Su, Xiu and Zhang, Shifeng and Zhou, Xu and Qi, Xiaojuan and Liao, Yue and Liu, Si}, title = {VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26181-26191} }
INFP: Audio-Driven Interactive Head Generation in Dyadic Conversations: Yongming Zhu,

Longhao Zhang,

Zhengkun Rong,

Tianshu Hu,

Shuang Liang,

Zhipeng Ge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yongming and Zhang, Longhao and Rong, Zhengkun and Hu, Tianshu and Liang, Shuang and Ge, Zhipeng}, title = {INFP: Audio-Driven Interactive Head Generation in Dyadic Conversations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10667-10677} }
Federated Learning with Domain Shift Eraser: Zheng Wang,

Zihui Wang,

Zheng Wang,

Xiaoliang Fan,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zheng and Wang, Zihui and Wang, Zheng and Fan, Xiaoliang and Wang, Cheng}, title = {Federated Learning with Domain Shift Eraser}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4978-4987} }
Cross-Modal Distillation for 2D/3D Multi-Object Discovery from 2D Motion: Saad Lahlali,

Sandra Kara,

Hejer Ammar,

Florian Chabot,

Nicolas Granger,

Hervé Le Borgne,

Quoc-Cuong Pham; [pdf] [supp]
[bibtex]
@InProceedings{Lahlali_2025_CVPR, author = {Lahlali, Saad and Kara, Sandra and Ammar, Hejer and Chabot, Florian and Granger, Nicolas and Le Borgne, Herv\'e and Pham, Quoc-Cuong}, title = {Cross-Modal Distillation for 2D/3D Multi-Object Discovery from 2D Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24529-24538} }
DiTCtrl: Exploring Attention Control in Multi-Modal Diffusion Transformer for Tuning-Free Multi-Prompt Longer Video Generation: Minghong Cai,

Xiaodong Cun,

Xiaoyu Li,

Wenze Liu,

Zhaoyang Zhang,

Yong Zhang,

Ying Shan,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Minghong and Cun, Xiaodong and Li, Xiaoyu and Liu, Wenze and Zhang, Zhaoyang and Zhang, Yong and Shan, Ying and Yue, Xiangyu}, title = {DiTCtrl: Exploring Attention Control in Multi-Modal Diffusion Transformer for Tuning-Free Multi-Prompt Longer Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7763-7772} }
Bridge Frame and Event: Common Spatiotemporal Fusion for High-Dynamic Scene Optical Flow: Hanyu Zhou,

Haonan Wang,

Haoyue Liu,

Yuxing Duan,

Yi Chang,

Luxin Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Hanyu and Wang, Haonan and Liu, Haoyue and Duan, Yuxing and Chang, Yi and Yan, Luxin}, title = {Bridge Frame and Event: Common Spatiotemporal Fusion for High-Dynamic Scene Optical Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27904-27913} }
EVPGS: Enhanced View Prior Guidance for Splatting-based Extrapolated View Synthesis: Jiahe Li,

Feiyu Wang,

Xiaochao Qu,

Chengjing Wu,

Luoqi Liu,

Ting Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiahe and Wang, Feiyu and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Liu, Ting}, title = {EVPGS: Enhanced View Prior Guidance for Splatting-based Extrapolated View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16398-16407} }
GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding: Yawen Shao,

Wei Zhai,

Yuhang Yang,

Hongchen Luo,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Yawen and Zhai, Wei and Yang, Yuhang and Luo, Hongchen and Cao, Yang and Zha, Zheng-Jun}, title = {GREAT: Geometry-Intention Collaborative Inference for Open-Vocabulary 3D Object Affordance Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17326-17336} }
Link to the Past: Temporal Propagation for Fast 3D Human Reconstruction from Monocular Video: Matthew Marchellus,

Nadhira Noor,

In Kyu Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marchellus_2025_CVPR, author = {Marchellus, Matthew and Noor, Nadhira and Park, In Kyu}, title = {Link to the Past: Temporal Propagation for Fast 3D Human Reconstruction from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6190-6199} }
Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification: Yanghao Wang,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yanghao and Chen, Long}, title = {Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25560-25569} }
Deterministic Certification of Graph Neural Networks against Graph Poisoning Attacks with Arbitrary Perturbations: Jiate Li,

Meng Pang,

Yun Dong,

Binghui Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiate and Pang, Meng and Dong, Yun and Wang, Binghui}, title = {Deterministic Certification of Graph Neural Networks against Graph Poisoning Attacks with Arbitrary Perturbations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5020-5029} }
A3: Few-shot Prompt Learning of Unlearnable Examples with Cross-Modal Adversarial Feature Alignment: Xuan Wang,

Xitong Gao,

Dongping Liao,

Tianrui Qin,

Yu-liang Lu,

Cheng-zhong Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xuan and Gao, Xitong and Liao, Dongping and Qin, Tianrui and Lu, Yu-liang and Xu, Cheng-zhong}, title = {A3: Few-shot Prompt Learning of Unlearnable Examples with Cross-Modal Adversarial Feature Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9507-9516} }
Adapting Pre-trained 3D Models for Point Cloud Video Understanding via Cross-frame Spatio-temporal Perception: Baixuan Lv,

Yaohua Zha,

Tao Dai,

Xue Yuerong,

Ke Chen,

Shu-Tao Xia; [pdf] [supp]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Baixuan and Zha, Yaohua and Dai, Tao and Yuerong, Xue and Chen, Ke and Xia, Shu-Tao}, title = {Adapting Pre-trained 3D Models for Point Cloud Video Understanding via Cross-frame Spatio-temporal Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12413-12422} }
MASH-VLM: Mitigating Action-Scene Hallucination in Video-LLMs through Disentangled Spatial-Temporal Representations: Kyungho Bae,

Jinhyung Kim,

Sihaeng Lee,

Soonyoung Lee,

Gunhee Lee,

Jinwoo Choi; [pdf] [supp]
[bibtex]
@InProceedings{Bae_2025_CVPR, author = {Bae, Kyungho and Kim, Jinhyung and Lee, Sihaeng and Lee, Soonyoung and Lee, Gunhee and Choi, Jinwoo}, title = {MASH-VLM: Mitigating Action-Scene Hallucination in Video-LLMs through Disentangled Spatial-Temporal Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13744-13753} }
UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing: Yung-Hsuan Lai,

Janek Ebbers,

Yu-Chiang Frank Wang,

François Germain,

Michael Jeffrey Jones,

Moitreya Chatterjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Yung-Hsuan and Ebbers, Janek and Wang, Yu-Chiang Frank and Germain, Fran\c{c}ois and Jones, Michael Jeffrey and Chatterjee, Moitreya}, title = {UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13561-13570} }
Mosaic of Modalities: A Comprehensive Benchmark for Multimodal Graph Learning: Jing Zhu,

Yuhang Zhou,

Shengyi Qian,

Zhongmou He,

Tong Zhao,

Neil Shah,

Danai Koutra; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jing and Zhou, Yuhang and Qian, Shengyi and He, Zhongmou and Zhao, Tong and Shah, Neil and Koutra, Danai}, title = {Mosaic of Modalities: A Comprehensive Benchmark for Multimodal Graph Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14215-14224} }
TSP-Mamba: The Travelling Salesman Problem Meets Mamba for Image Super-resolution and Beyond: Kun Zhou,

Xinyu Lin,

Jiangbo Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Kun and Lin, Xinyu and Lu, Jiangbo}, title = {TSP-Mamba: The Travelling Salesman Problem Meets Mamba for Image Super-resolution and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28134-28143} }
MVPaint: Synchronized Multi-View Diffusion for Painting Anything 3D: Wei Cheng,

Juncheng Mu,

Xianfang Zeng,

Xin Chen,

Anqi Pang,

Chi Zhang,

Zhibin Wang,

Bin Fu,

Gang Yu,

Ziwei Liu,

Liang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Wei and Mu, Juncheng and Zeng, Xianfang and Chen, Xin and Pang, Anqi and Zhang, Chi and Wang, Zhibin and Fu, Bin and Yu, Gang and Liu, Ziwei and Pan, Liang}, title = {MVPaint: Synchronized Multi-View Diffusion for Painting Anything 3D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {585-594} }
FirePlace: Geometric Refinements of LLM Common Sense Reasoning for 3D Object Placement: Ian Huang,

Yanan Bao,

Karen Truong,

Howard Zhou,

Cordelia Schmid,

Leonidas Guibas,

Alireza Fathi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Ian and Bao, Yanan and Truong, Karen and Zhou, Howard and Schmid, Cordelia and Guibas, Leonidas and Fathi, Alireza}, title = {FirePlace: Geometric Refinements of LLM Common Sense Reasoning for 3D Object Placement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13466-13476} }
RENO: Real-Time Neural Compression for 3D LiDAR Point Clouds: Kang You,

Tong Chen,

Dandan Ding,

M. Salman Asif,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Kang and Chen, Tong and Ding, Dandan and Asif, M. Salman and Ma, Zhan}, title = {RENO: Real-Time Neural Compression for 3D LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22172-22181} }
End-to-End Implicit Neural Representations for Classification: Alexander Gielisse,

Jan van Gemert; [pdf] [arXiv]
[bibtex]
@InProceedings{Gielisse_2025_CVPR, author = {Gielisse, Alexander and van Gemert, Jan}, title = {End-to-End Implicit Neural Representations for Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18728-18737} }
ASAP: Advancing Semantic Alignment Promotes Multi-Modal Manipulation Detecting and Grounding: Zhenxing Zhang,

Yaxiong Wang,

Lechao Cheng,

Zhun Zhong,

Dan Guo,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhenxing and Wang, Yaxiong and Cheng, Lechao and Zhong, Zhun and Guo, Dan and Wang, Meng}, title = {ASAP: Advancing Semantic Alignment Promotes Multi-Modal Manipulation Detecting and Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4005-4014} }
UNICL-SAM: Uncertainty-Driven In-Context Segmentation with Part Prototype Discovery: Dianmo Sheng,

Dongdong Chen,

Zhentao Tan,

Qiankun Liu,

Qi Chu,

Tao Gong,

Bin Liu,

Jing Han,

Wenbin Tu,

Shengwei Xu,

Nenghai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Sheng_2025_CVPR, author = {Sheng, Dianmo and Chen, Dongdong and Tan, Zhentao and Liu, Qiankun and Chu, Qi and Gong, Tao and Liu, Bin and Han, Jing and Tu, Wenbin and Xu, Shengwei and Yu, Nenghai}, title = {UNICL-SAM: Uncertainty-Driven In-Context Segmentation with Part Prototype Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20201-20211} }
Layered Motion Fusion: Lifting Motion Segmentation to 3D in Egocentric Videos: Vadim Tschernezki,

Diane Larlus,

Iro Laina,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tschernezki_2025_CVPR, author = {Tschernezki, Vadim and Larlus, Diane and Laina, Iro and Vedaldi, Andrea}, title = {Layered Motion Fusion: Lifting Motion Segmentation to 3D in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17637-17648} }
FADE: Frequency-Aware Diffusion Model Factorization for Video Editing: Yixuan Zhu,

Haolin Wang,

Shilin Ma,

Wenliang Zhao,

Yansong Tang,

Lei Chen,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yixuan and Wang, Haolin and Ma, Shilin and Zhao, Wenliang and Tang, Yansong and Chen, Lei and Zhou, Jie}, title = {FADE: Frequency-Aware Diffusion Model Factorization for Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28426-28435} }
MotiF: Making Text Count in Image Animation with Motion Focal Loss: Shijie Wang,

Samaneh Azadi,

Rohit Girdhar,

Saketh Rambhatla,

Chen Sun,

Xi Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shijie and Azadi, Samaneh and Girdhar, Rohit and Rambhatla, Saketh and Sun, Chen and Yin, Xi}, title = {MotiF: Making Text Count in Image Animation with Motion Focal Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7773-7783} }
Towards Explicit Geometry-Reflectance Collaboration for Generalized LiDAR Segmentation in Adverse Weather: Longyu Yang,

Ping Hu,

Shangbo Yuan,

Lu Zhang,

Jun Liu,

Hengtao Shen,

Xiaofeng Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Longyu and Hu, Ping and Yuan, Shangbo and Zhang, Lu and Liu, Jun and Shen, Hengtao and Zhu, Xiaofeng}, title = {Towards Explicit Geometry-Reflectance Collaboration for Generalized LiDAR Segmentation in Adverse Weather}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {139-149} }
Data Synthesis with Diverse Styles for Face Recognition via 3DMM-Guided Diffusion: Yuxi Mi,

Zhizhou Zhong,

Yuge Huang,

Qiuyang Yuan,

Xuan Zhao,

Jianqing Xu,

Shouhong Ding,

Shaoming Wang,

Rizen Guo,

Shuigeng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2025_CVPR, author = {Mi, Yuxi and Zhong, Zhizhou and Huang, Yuge and Yuan, Qiuyang and Zhao, Xuan and Xu, Jianqing and Ding, Shouhong and Wang, Shaoming and Guo, Rizen and Zhou, Shuigeng}, title = {Data Synthesis with Diverse Styles for Face Recognition via 3DMM-Guided Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21203-21214} }
Diffusion Self-Distillation for Zero-Shot Customized Image Generation: Shengqu Cai,

Eric Ryan Chan,

Yunzhi Zhang,

Leonidas Guibas,

Jiajun Wu,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Shengqu and Chan, Eric Ryan and Zhang, Yunzhi and Guibas, Leonidas and Wu, Jiajun and Wetzstein, Gordon}, title = {Diffusion Self-Distillation for Zero-Shot Customized Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18434-18443} }
Uncertainty-guided Perturbation for Image Super-Resolution Diffusion Model: Leheng Zhang,

Weiyi You,

Kexuan Shi,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Leheng and You, Weiyi and Shi, Kexuan and Gu, Shuhang}, title = {Uncertainty-guided Perturbation for Image Super-Resolution Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17980-17989} }
Geometric Knowledge-Guided Localized Global Distribution Alignment for Federated Learning: Yanbiao Ma,

Wei Dai,

Wenke Huang,

Jiayi Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Yanbiao and Dai, Wei and Huang, Wenke and Chen, Jiayi}, title = {Geometric Knowledge-Guided Localized Global Distribution Alignment for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20958-20968} }
Towards Human-Understandable Multi-Dimensional Concept Discovery: Arne Grobrügge,

Niklas Kühl,

Gerhard Satzger,

Philipp Spitzer; [pdf] [supp]
[bibtex]
@InProceedings{Grobrugge_2025_CVPR, author = {Grobr\"ugge, Arne and K\"uhl, Niklas and Satzger, Gerhard and Spitzer, Philipp}, title = {Towards Human-Understandable Multi-Dimensional Concept Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20018-20027} }
GlyphMastero: A Glyph Encoder for High-Fidelity Scene Text Editing: Tong Wang,

Ting Liu,

Xiaochao Qu,

Chengjing Wu,

Luoqi Liu,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tong and Liu, Ting and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Hu, Xiaolin}, title = {GlyphMastero: A Glyph Encoder for High-Fidelity Scene Text Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28523-28532} }
ConText-CIR: Learning from Concepts in Text for Composed Image Retrieval: Eric Xing,

Pranavi Kolouju,

Robert Pless,

Abby Stylianou,

Nathan Jacobs; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Eric and Kolouju, Pranavi and Pless, Robert and Stylianou, Abby and Jacobs, Nathan}, title = {ConText-CIR: Learning from Concepts in Text for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19638-19648} }
MaskGaussian: Adaptive 3D Gaussian Representation from Probabilistic Masks: Yifei Liu,

Zhihang Zhong,

Yifan Zhan,

Sheng Xu,

Xiao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yifei and Zhong, Zhihang and Zhan, Yifan and Xu, Sheng and Sun, Xiao}, title = {MaskGaussian: Adaptive 3D Gaussian Representation from Probabilistic Masks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {681-690} }
Perturb-and-Revise: Flexible 3D Editing with Generative Trajectories: Susung Hong,

Johanna Karras,

Ricardo Martin-Brualla,

Ira Kemelmacher-Shlizerman; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Susung and Karras, Johanna and Martin-Brualla, Ricardo and Kemelmacher-Shlizerman, Ira}, title = {Perturb-and-Revise: Flexible 3D Editing with Generative Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16293-16303} }
Birth and Death of a Rose: Chen Geng,

Yunzhi Zhang,

Shangzhe Wu,

Jiajun Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Geng_2025_CVPR, author = {Geng, Chen and Zhang, Yunzhi and Wu, Shangzhe and Wu, Jiajun}, title = {Birth and Death of a Rose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26102-26113} }
Learning Compatible Multi-Prize Subnetworks for Asymmetric Retrieval: Yushuai Sun,

Zikun Zhou,

Dongmei Jiang,

Yaowei Wang,

Jun Yu,

Guangming Lu,

Wenjie Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yushuai and Zhou, Zikun and Jiang, Dongmei and Wang, Yaowei and Yu, Jun and Lu, Guangming and Pei, Wenjie}, title = {Learning Compatible Multi-Prize Subnetworks for Asymmetric Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15255-15264} }
SoundVista: Novel-View Ambient Sound Synthesis via Visual-Acoustic Binding: Mingfei Chen,

Israel D. Gebru,

Ishwarya Ananthabhotla,

Christian Richardt,

Dejan Markovic,

Jake Sandakly,

Steven Krenn,

Todd Keebler,

Eli Shlizerman,

Alexander Richard; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Mingfei and Gebru, Israel D. and Ananthabhotla, Ishwarya and Richardt, Christian and Markovic, Dejan and Sandakly, Jake and Krenn, Steven and Keebler, Todd and Shlizerman, Eli and Richard, Alexander}, title = {SoundVista: Novel-View Ambient Sound Synthesis via Visual-Acoustic Binding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8331-8341} }
CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation: Reza Abbasi,

Ali Nazari,

Aminreza Sefid,

Mohammadali Banayeeanzade,

Mohammad Hossein Rohban,

Mahdieh Soleymani Baghshah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abbasi_2025_CVPR, author = {Abbasi, Reza and Nazari, Ali and Sefid, Aminreza and Banayeeanzade, Mohammadali and Rohban, Mohammad Hossein and Baghshah, Mahdieh Soleymani}, title = {CLIP Under the Microscope: A Fine-Grained Analysis of Multi-Object Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9308-9317} }
MetricGrids: Arbitrary Nonlinear Approximation with Elementary Metric Grids based Implicit Neural Representation: Shu Wang,

Yanbo Gao,

Shuai Li,

Chong Lv,

Xun Cai,

Chuankun Li,

Hui Yuan,

Jinglin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shu and Gao, Yanbo and Li, Shuai and Lv, Chong and Cai, Xun and Li, Chuankun and Yuan, Hui and Zhang, Jinglin}, title = {MetricGrids: Arbitrary Nonlinear Approximation with Elementary Metric Grids based Implicit Neural Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21381-21391} }
Navigating Image Restoration with VAR's Distribution Alignment Prior: Siyang Wang,

Naishan Zheng,

Jie Huang,

Feng Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Siyang and Zheng, Naishan and Huang, Jie and Zhao, Feng}, title = {Navigating Image Restoration with VAR's Distribution Alignment Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7559-7569} }
MovieBench: A Hierarchical Movie Level Dataset for Long Video Generation: Weijia Wu,

Mingyu Liu,

Zeyu Zhu,

Xi Xia,

Haoen Feng,

Wen Wang,

Kevin Qinghong Lin,

Chunhua Shen,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Weijia and Liu, Mingyu and Zhu, Zeyu and Xia, Xi and Feng, Haoen and Wang, Wen and Lin, Kevin Qinghong and Shen, Chunhua and Shou, Mike Zheng}, title = {MovieBench: A Hierarchical Movie Level Dataset for Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28984-28994} }
Dissecting and Mitigating Diffusion Bias via Mechanistic Interpretability: Yingdong Shi,

Changming Li,

Yifan Wang,

Yongxiang Zhao,

Anqi Pang,

Sibei Yang,

Jingyi Yu,

Kan Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Yingdong and Li, Changming and Wang, Yifan and Zhao, Yongxiang and Pang, Anqi and Yang, Sibei and Yu, Jingyi and Ren, Kan}, title = {Dissecting and Mitigating Diffusion Bias via Mechanistic Interpretability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8192-8202} }
Graph Neural Network Combining Event Stream and Periodic Aggregation for Low-Latency Event-based Vision: Manon Dampfhoffer,

Thomas Mesquida,

Damien Joubert,

Thomas Dalgaty,

Pascal Vivet,

Christoph Posch; [pdf]
[bibtex]
@InProceedings{Dampfhoffer_2025_CVPR, author = {Dampfhoffer, Manon and Mesquida, Thomas and Joubert, Damien and Dalgaty, Thomas and Vivet, Pascal and Posch, Christoph}, title = {Graph Neural Network Combining Event Stream and Periodic Aggregation for Low-Latency Event-based Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6909-6918} }
Be More Specific: Evaluating Object-centric Realism in Synthetic Images: Anqi Liang,

Ciprian Corneanu,

Qianli Feng,

Giorgio Giannone,

Aleix Martinez; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Anqi and Corneanu, Ciprian and Feng, Qianli and Giannone, Giorgio and Martinez, Aleix}, title = {Be More Specific: Evaluating Object-centric Realism in Synthetic Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28842-28851} }
Correlative and Discriminative Label Grouping for Multi-Label Visual Prompt Tuning: Lei-Lei Ma,

Shuo Xu,

Ming-Kun Xie,

Lei Wang,

Dengdi Sun,

Haifeng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Lei-Lei and Xu, Shuo and Xie, Ming-Kun and Wang, Lei and Sun, Dengdi and Zhao, Haifeng}, title = {Correlative and Discriminative Label Grouping for Multi-Label Visual Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25434-25443} }
LoRACLR: Contrastive Adaptation for Customization of Diffusion Models: Enis Simsar,

Thomas Hofmann,

Federico Tombari,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Simsar_2025_CVPR, author = {Simsar, Enis and Hofmann, Thomas and Tombari, Federico and Yanardag, Pinar}, title = {LoRACLR: Contrastive Adaptation for Customization of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13189-13198} }
ArtFormer: Controllable Generation of Diverse 3D Articulated Objects: Jiayi Su,

Youhe Feng,

Zheng Li,

Jinhua Song,

Yangfan He,

Botao Ren,

Botian Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Jiayi and Feng, Youhe and Li, Zheng and Song, Jinhua and He, Yangfan and Ren, Botao and Xu, Botian}, title = {ArtFormer: Controllable Generation of Diverse 3D Articulated Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1894-1904} }
Opportunistic Single-Photon Time of Flight: Sotiris Nousias,

Mian Wei,

Howard Xiao,

Maxx Wu,

Shahmeer Athar,

Kevin J. Wang,

Anagh Malik,

David A. Barmherzig,

David B. Lindell,

Kyros N. Kutulakos; [pdf] [supp]
[bibtex]
@InProceedings{Nousias_2025_CVPR, author = {Nousias, Sotiris and Wei, Mian and Xiao, Howard and Wu, Maxx and Athar, Shahmeer and Wang, Kevin J. and Malik, Anagh and Barmherzig, David A. and Lindell, David B. and Kutulakos, Kyros N.}, title = {Opportunistic Single-Photon Time of Flight}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15852-15862} }
Bridging Gait Recognition and Large Language Models Sequence Modeling: Shaopeng Yang,

Jilong Wang,

Saihui Hou,

Xu Liu,

Chunshui Cao,

Liang Wang,

Yongzhen Huang; [pdf]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Shaopeng and Wang, Jilong and Hou, Saihui and Liu, Xu and Cao, Chunshui and Wang, Liang and Huang, Yongzhen}, title = {Bridging Gait Recognition and Large Language Models Sequence Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3460-3469} }
Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought: Yunze Man,

De-An Huang,

Guilin Liu,

Shiwei Sheng,

Shilong Liu,

Liang-Yan Gui,

Jan Kautz,

Yu-Xiong Wang,

Zhiding Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Man_2025_CVPR, author = {Man, Yunze and Huang, De-An and Liu, Guilin and Sheng, Shiwei and Liu, Shilong and Gui, Liang-Yan and Kautz, Jan and Wang, Yu-Xiong and Yu, Zhiding}, title = {Argus: Vision-Centric Reasoning with Grounded Chain-of-Thought}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14268-14280} }
Bootstrap Your Own Views: Masked Ego-Exo Modeling for Fine-grained View-invariant Video Representations: Jungin Park,

Jiyoung Lee,

Kwanghoon Sohn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jungin and Lee, Jiyoung and Sohn, Kwanghoon}, title = {Bootstrap Your Own Views: Masked Ego-Exo Modeling for Fine-grained View-invariant Video Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13661-13670} }
SFDM: Robust Decomposition of Geometry and Reflectance for Realistic Face Rendering from Sparse-view Images: Daisheng Jin,

Jiangbei Hu,

Baixin Xu,

Yuxin Dai,

Chen Qian,

Ying He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Daisheng and Hu, Jiangbei and Xu, Baixin and Dai, Yuxin and Qian, Chen and He, Ying}, title = {SFDM: Robust Decomposition of Geometry and Reflectance for Realistic Face Rendering from Sparse-view Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26409-26419} }
DiSRT-In-Bed: Diffusion-Based Sim-to-Real Transfer Framework for In-Bed Human Mesh Recovery: Jing Gao,

Ce Zheng,

Laszlo A. Jeni,

Zackory Erickson; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Jing and Zheng, Ce and Jeni, Laszlo A. and Erickson, Zackory}, title = {DiSRT-In-Bed: Diffusion-Based Sim-to-Real Transfer Framework for In-Bed Human Mesh Recovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1829-1838} }
Ouroboros3D: Image-to-3D Generation via 3D-aware Recursive Diffusion: Hao Wen,

Zehuan Huang,

Yaohui Wang,

Xinyuan Chen,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Hao and Huang, Zehuan and Wang, Yaohui and Chen, Xinyuan and Sheng, Lu}, title = {Ouroboros3D: Image-to-3D Generation via 3D-aware Recursive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21631-21641} }
QMambaBSR: Burst Image Super-Resolution with Query State Space Model: Xin Di,

Long Peng,

Peizhe Xia,

Wenbo Li,

Renjing Pei,

Yang Cao,

Yang Wang,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Di_2025_CVPR, author = {Di, Xin and Peng, Long and Xia, Peizhe and Li, Wenbo and Pei, Renjing and Cao, Yang and Wang, Yang and Zha, Zheng-Jun}, title = {QMambaBSR: Burst Image Super-Resolution with Query State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23080-23090} }
Encapsulated Composition of Text-to-Image and Text-to-Video Models for High-Quality Video Synthesis: Tongtong Su,

Chengyu Wang,

Bingyan Liu,

Jun Huang,

Dongming Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Tongtong and Wang, Chengyu and Liu, Bingyan and Huang, Jun and Lu, Dongming}, title = {Encapsulated Composition of Text-to-Image and Text-to-Video Models for High-Quality Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18209-18218} }
Multi-Group Proportional Representations for Text-to-Image Models: Sangwon Jung,

Alex Oesterling,

Claudio Mayrink Verdun,

Sajani Vithana,

Taesup Moon,

Flavio P. Calmon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Sangwon and Oesterling, Alex and Verdun, Claudio Mayrink and Vithana, Sajani and Moon, Taesup and Calmon, Flavio P.}, title = {Multi-Group Proportional Representations for Text-to-Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23744-23754} }
Towards Generalizable Trajectory Prediction using Dual-Level Representation Learning and Adaptive Prompting: Kaouther Messaoud,

Matthieu Cord,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Messaoud_2025_CVPR, author = {Messaoud, Kaouther and Cord, Matthieu and Alahi, Alexandre}, title = {Towards Generalizable Trajectory Prediction using Dual-Level Representation Learning and Adaptive Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27564-27574} }
CoMatcher: Multi-View Collaborative Feature Matching: Jintao Zhang,

Zimin Xia,

Mingyue Dong,

Shuhan Shen,

Linwei Yue,

Xianwei Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jintao and Xia, Zimin and Dong, Mingyue and Shen, Shuhan and Yue, Linwei and Zheng, Xianwei}, title = {CoMatcher: Multi-View Collaborative Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21970-21980} }
COUNTS: Benchmarking Object Detectors and Multimodal Large Language Models under Distribution Shifts: Jiansheng Li,

Xingxuan Zhang,

Hao Zou,

Yige Guo,

Renzhe Xu,

Yilong Liu,

Chuzhao Zhu,

Yue He,

Peng Cui; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiansheng and Zhang, Xingxuan and Zou, Hao and Guo, Yige and Xu, Renzhe and Liu, Yilong and Zhu, Chuzhao and He, Yue and Cui, Peng}, title = {COUNTS: Benchmarking Object Detectors and Multimodal Large Language Models under Distribution Shifts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9186-9198} }
Retrieving Semantics from the Deep: an RAG Solution for Gesture Synthesis: M. Hamza Mughal,

Rishabh Dabral,

Merel C.J. Scholman,

Vera Demberg,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mughal_2025_CVPR, author = {Mughal, M. Hamza and Dabral, Rishabh and Scholman, Merel C.J. and Demberg, Vera and Theobalt, Christian}, title = {Retrieving Semantics from the Deep: an RAG Solution for Gesture Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16578-16588} }
HOT: Hadamard-based Optimized Training: Seonggon Kim,

Juncheol Shin,

Seung-taek Woo,

Eunhyeok Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Seonggon and Shin, Juncheol and Woo, Seung-taek and Park, Eunhyeok}, title = {HOT: Hadamard-based Optimized Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4787-4796} }
Towards a Universal Synthetic Video Detector: From Face or Background Manipulations to Fully AI-Generated Content: Rohit Kundu,

Hao Xiong,

Vishal Mohanty,

Athula Balachandran,

Amit K. Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kundu_2025_CVPR, author = {Kundu, Rohit and Xiong, Hao and Mohanty, Vishal and Balachandran, Athula and Roy-Chowdhury, Amit K.}, title = {Towards a Universal Synthetic Video Detector: From Face or Background Manipulations to Fully AI-Generated Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28050-28060} }
TokenFlow: Unified Image Tokenizer for Multimodal Understanding and Generation: Liao Qu,

Huichao Zhang,

Yiheng Liu,

Xu Wang,

Yi Jiang,

Yiming Gao,

Hu Ye,

Daniel K. Du,

Zehuan Yuan,

Xinglong Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Liao and Zhang, Huichao and Liu, Yiheng and Wang, Xu and Jiang, Yi and Gao, Yiming and Ye, Hu and Du, Daniel K. and Yuan, Zehuan and Wu, Xinglong}, title = {TokenFlow: Unified Image Tokenizer for Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2545-2555} }
Improving Personalized Search with Regularized Low-Rank Parameter Updates: Fiona Ryan,

Josef Sivic,

Fabian Caba Heilbron,

Judy Hoffman,

James M. Rehg,

Bryan Russell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ryan_2025_CVPR, author = {Ryan, Fiona and Sivic, Josef and Heilbron, Fabian Caba and Hoffman, Judy and Rehg, James M. and Russell, Bryan}, title = {Improving Personalized Search with Regularized Low-Rank Parameter Updates}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19748-19757} }
A Focused Human Body Model for Accurate Anthropometric Measurements Extraction: Shuhang Chen,

Xianliang Huang,

Zhizhou Zhong,

Juhong Guan,

Shuigeng Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Shuhang and Huang, Xianliang and Zhong, Zhizhou and Guan, Juhong and Zhou, Shuigeng}, title = {A Focused Human Body Model for Accurate Anthropometric Measurements Extraction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22658-22667} }
SnapGen-V: Generating a Five-Second Video within Five Seconds on a Mobile Device: Yushu Wu,

Zhixing Zhang,

Yanyu Li,

Yanwu Xu,

Anil Kag,

Yang Sui,

Huseyin Coskun,

Ke Ma,

Aleksei Lebedev,

Ju Hu,

Dimitris N. Metaxas,

Yanzhi Wang,

Sergey Tulyakov,

Jian Ren; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yushu and Zhang, Zhixing and Li, Yanyu and Xu, Yanwu and Kag, Anil and Sui, Yang and Coskun, Huseyin and Ma, Ke and Lebedev, Aleksei and Hu, Ju and Metaxas, Dimitris N. and Wang, Yanzhi and Tulyakov, Sergey and Ren, Jian}, title = {SnapGen-V: Generating a Five-Second Video within Five Seconds on a Mobile Device}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2479-2490} }
Adapting Dense Matching for Homography Estimation with Grid-based Acceleration: Kaining Zhang,

Yuxin Deng,

Jiayi Ma,

Paolo Favaro; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kaining and Deng, Yuxin and Ma, Jiayi and Favaro, Paolo}, title = {Adapting Dense Matching for Homography Estimation with Grid-based Acceleration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6294-6303} }
HyperLoRA: Parameter-Efficient Adaptive Generation for Portrait Synthesis: Mengtian Li,

Jinshu Chen,

Wanquan Feng,

Bingchuan Li,

Fei Dai,

Songtao Zhao,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Mengtian and Chen, Jinshu and Feng, Wanquan and Li, Bingchuan and Dai, Fei and Zhao, Songtao and He, Qian}, title = {HyperLoRA: Parameter-Efficient Adaptive Generation for Portrait Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13114-13123} }
ACE: Anti-Editing Concept Erasure in Text-to-Image Models: Zihao Wang,

Yuxiang Wei,

Fan Li,

Renjing Pei,

Hang Xu,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zihao and Wei, Yuxiang and Li, Fan and Pei, Renjing and Xu, Hang and Zuo, Wangmeng}, title = {ACE: Anti-Editing Concept Erasure in Text-to-Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23505-23515} }
EchoMatch: Partial-to-Partial Shape Matching via Correspondence Reflection: Yizheng Xie,

Viktoria Ehm,

Paul Roetzer,

Nafie El Amrani,

Maolin Gao,

Florian Bernard,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yizheng and Ehm, Viktoria and Roetzer, Paul and El Amrani, Nafie and Gao, Maolin and Bernard, Florian and Cremers, Daniel}, title = {EchoMatch: Partial-to-Partial Shape Matching via Correspondence Reflection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11665-11675} }
CoSDH: Communication-Efficient Collaborative Perception via Supply-Demand Awareness and Intermediate-Late Hybridization: Junhao Xu,

Yanan Zhang,

Zhi Cai,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Junhao and Zhang, Yanan and Cai, Zhi and Huang, Di}, title = {CoSDH: Communication-Efficient Collaborative Perception via Supply-Demand Awareness and Intermediate-Late Hybridization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6834-6843} }
Stereo Anywhere: Robust Zero-Shot Deep Stereo Matching Even Where Either Stereo or Mono Fail: Luca Bartolomei,

Fabio Tosi,

Matteo Poggi,

Stefano Mattoccia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bartolomei_2025_CVPR, author = {Bartolomei, Luca and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano}, title = {Stereo Anywhere: Robust Zero-Shot Deep Stereo Matching Even Where Either Stereo or Mono Fail}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1013-1027} }
Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping: Guannan Lai,

Yujie Li,

Xiangkun Wang,

Junbo Zhang,

Tianrui Li,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Guannan and Li, Yujie and Wang, Xiangkun and Zhang, Junbo and Li, Tianrui and Yang, Xin}, title = {Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4894-4904} }
Where the Devil Hides: Deepfake Detectors Can No Longer Be Trusted: Shuaiwei Yuan,

Junyu Dong,

Yuezun Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Shuaiwei and Dong, Junyu and Li, Yuezun}, title = {Where the Devil Hides: Deepfake Detectors Can No Longer Be Trusted}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8764-8774} }
Synthetic-to-Real Self-supervised Robust Depth Estimation via Learning with Motion and Structure Priors: Weilong Yan,

Ming Li,

Haipeng Li,

Shuwei Shao,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Weilong and Li, Ming and Li, Haipeng and Shao, Shuwei and Tan, Robby T.}, title = {Synthetic-to-Real Self-supervised Robust Depth Estimation via Learning with Motion and Structure Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21880-21890} }
CaMuViD: Calibration-Free Multi-View Detection: Amir Etefaghi Daryani,

M. Usman Maqbool Bhutta,

Byron Hernandez,

Henry Medeiros; [pdf]
[bibtex]
@InProceedings{Daryani_2025_CVPR, author = {Daryani, Amir Etefaghi and Bhutta, M. Usman Maqbool and Hernandez, Byron and Medeiros, Henry}, title = {CaMuViD: Calibration-Free Multi-View Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1220-1229} }
Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled Prosody Adapting for Movie Dubbing: Zhedong Zhang,

Liang Li,

Chenggang Yan,

Chunshan Liu,

Anton van den Hengel,

Yuankai Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhedong and Li, Liang and Yan, Chenggang and Liu, Chunshan and van den Hengel, Anton and Qi, Yuankai}, title = {Prosody-Enhanced Acoustic Pre-training and Acoustic-Disentangled Prosody Adapting for Movie Dubbing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {172-182} }
Hierarchical Knowledge Prompt Tuning for Multi-task Test-Time Adaptation: Qiang Zhang,

Mengsheng Zhao,

Jiawei Liu,

Fanrui Zhang,

Yongchao Xu,

Zheng-Jun Zha; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qiang and Zhao, Mengsheng and Liu, Jiawei and Zhang, Fanrui and Xu, Yongchao and Zha, Zheng-Jun}, title = {Hierarchical Knowledge Prompt Tuning for Multi-task Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30524-30533} }
Cross-Modal Interactive Perception Network with Mamba for Lung Tumor Segmentation in PET-CT Images: Jie Mei,

Chenyu Lin,

Yu Qiu,

Yaonan Wang,

Hui Zhang,

Ziyang Wang,

Dong Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Jie and Lin, Chenyu and Qiu, Yu and Wang, Yaonan and Zhang, Hui and Wang, Ziyang and Dai, Dong}, title = {Cross-Modal Interactive Perception Network with Mamba for Lung Tumor Segmentation in PET-CT Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15653-15662} }
LaTexBlend: Scaling Multi-concept Customized Generation with Latent Textual Blending: Jian Jin,

Zhenbo Yu,

Yang Shen,

Zhenyong Fu,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Jian and Yu, Zhenbo and Shen, Yang and Fu, Zhenyong and Yang, Jian}, title = {LaTexBlend: Scaling Multi-concept Customized Generation with Latent Textual Blending}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23585-23594} }
DejaVid: Encoder-Agnostic Learned Temporal Matching for Video Classification: Darryl Ho,

Samuel Madden; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2025_CVPR, author = {Ho, Darryl and Madden, Samuel}, title = {DejaVid: Encoder-Agnostic Learned Temporal Matching for Video Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24023-24032} }
HVI: A New Color Space for Low-light Image Enhancement: Qingsen Yan,

Yixu Feng,

Cheng Zhang,

Guansong Pang,

Kangbiao Shi,

Peng Wu,

Wei Dong,

Jinqiu Sun,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Qingsen and Feng, Yixu and Zhang, Cheng and Pang, Guansong and Shi, Kangbiao and Wu, Peng and Dong, Wei and Sun, Jinqiu and Zhang, Yanning}, title = {HVI: A New Color Space for Low-light Image Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5678-5687} }
DualPM: Dual Posed-Canonical Point Maps for 3D Shape and Pose Reconstruction: Ben Kaye,

Tomas Jakab,

Shangzhe Wu,

Christian Ruprecht,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kaye_2025_CVPR, author = {Kaye, Ben and Jakab, Tomas and Wu, Shangzhe and Ruprecht, Christian and Vedaldi, Andrea}, title = {DualPM: Dual Posed-Canonical Point Maps for 3D Shape and Pose Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6425-6435} }
SuperPC: A Single Diffusion Model for Point Cloud Completion, Upsampling, Denoising, and Colorization: Yi Du,

Zhipeng Zhao,

Shaoshu Su,

Sharath Golluri,

Haoze Zheng,

Runmao Yao,

Chen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Yi and Zhao, Zhipeng and Su, Shaoshu and Golluri, Sharath and Zheng, Haoze and Yao, Runmao and Wang, Chen}, title = {SuperPC: A Single Diffusion Model for Point Cloud Completion, Upsampling, Denoising, and Colorization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16953-16964} }
One Diffusion to Generate Them All: Duong H. Le,

Tuan Pham,

Sangho Lee,

Christopher Clark,

Aniruddha Kembhavi,

Stephan Mandt,

Ranjay Krishna,

Jiasen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2025_CVPR, author = {Le, Duong H. and Pham, Tuan and Lee, Sangho and Clark, Christopher and Kembhavi, Aniruddha and Mandt, Stephan and Krishna, Ranjay and Lu, Jiasen}, title = {One Diffusion to Generate Them All}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2671-2682} }
Let's Verify and Reinforce Image Generation Step by Step: Renrui Zhang,

Chengzhuo Tong,

Zhizheng Zhao,

Ziyu Guo,

Haoquan Zhang,

Manyuan Zhang,

Jiaming Liu,

Peng Gao,

Hongsheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Renrui and Tong, Chengzhuo and Zhao, Zhizheng and Guo, Ziyu and Zhang, Haoquan and Zhang, Manyuan and Liu, Jiaming and Gao, Peng and Li, Hongsheng}, title = {Let's Verify and Reinforce Image Generation Step by Step}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28662-28672} }
All-Optical Nonlinear Diffractive Deep Network for Ultrafast Image Denoising: Xiaoling Zhou,

Zhemg Lee,

Wei Ye,

Rui Xie,

Wenbo Zhang,

Guanju Peng,

Zongze Li,

Shikun Zhang; [pdf]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Xiaoling and Lee, Zhemg and Ye, Wei and Xie, Rui and Zhang, Wenbo and Peng, Guanju and Li, Zongze and Zhang, Shikun}, title = {All-Optical Nonlinear Diffractive Deep Network for Ultrafast Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28221-28231} }
Maintaining Consistent Inter-Class Topology in Continual Test-Time Adaptation: Chenggong Ni,

Fan Lyu,

Jiayao Tan,

Fuyuan Hu,

Rui Yao,

Tao Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Chenggong and Lyu, Fan and Tan, Jiayao and Hu, Fuyuan and Yao, Rui and Zhou, Tao}, title = {Maintaining Consistent Inter-Class Topology in Continual Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15319-15328} }
UNOPose: Unseen Object Pose Estimation with an Unposed RGB-D Reference Image: Xingyu Liu,

Gu Wang,

Ruida Zhang,

Chenyangguang Zhang,

Federico Tombari,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xingyu and Wang, Gu and Zhang, Ruida and Zhang, Chenyangguang and Tombari, Federico and Ji, Xiangyang}, title = {UNOPose: Unseen Object Pose Estimation with an Unposed RGB-D Reference Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22023-22034} }
CoSER: Towards Consistent Dense Multiview Text-to-Image Generator for 3D Creation: Bonan Li,

Zicheng Zhang,

Xingyi Yang,

Xinchao Wang; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Bonan and Zhang, Zicheng and Yang, Xingyi and Wang, Xinchao}, title = {CoSER: Towards Consistent Dense Multiview Text-to-Image Generator for 3D Creation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2880-2890} }
HybridMQA: Exploring Geometry-Texture Interactions for Colored Mesh Quality Assessment: Armin Shafiee Sarvestani,

Sheyang Tang,

Zhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarvestani_2025_CVPR, author = {Sarvestani, Armin Shafiee and Tang, Sheyang and Wang, Zhou}, title = {HybridMQA: Exploring Geometry-Texture Interactions for Colored Mesh Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21414-21424} }
Generalized Gaussian Entropy Model for Point Cloud Attribute Compression with Dynamic Likelihood Intervals: Changhao Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Changhao}, title = {Generalized Gaussian Entropy Model for Point Cloud Attribute Compression with Dynamic Likelihood Intervals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11779-11788} }
Self-Learning Hyperspectral and Multispectral Image Fusion via Adaptive Residual Guided Subspace Diffusion Model: Jian Zhu,

He Wang,

Yang Xu,

Zebin Wu,

Zhihui Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jian and Wang, He and Xu, Yang and Wu, Zebin and Wei, Zhihui}, title = {Self-Learning Hyperspectral and Multispectral Image Fusion via Adaptive Residual Guided Subspace Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17862-17871} }
SIR-DIFF: Sparse Image Sets Restoration with Multi-View Diffusion Model: Yucheng Mao,

Boyang Wang,

Nilesh Kulkarni,

Jeong Joon Park; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Yucheng and Wang, Boyang and Kulkarni, Nilesh and Park, Jeong Joon}, title = {SIR-DIFF: Sparse Image Sets Restoration with Multi-View Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21620-21630} }
StickMotion: Generating 3D Human Motions by Drawing a Stickman: Tao Wang,

Zhihua Wu,

Qiaozhi He,

Jiaming Chu,

Ling Qian,

Yu Cheng,

Junliang Xing,

Jian Zhao,

Lei Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tao and Wu, Zhihua and He, Qiaozhi and Chu, Jiaming and Qian, Ling and Cheng, Yu and Xing, Junliang and Zhao, Jian and Jin, Lei}, title = {StickMotion: Generating 3D Human Motions by Drawing a Stickman}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12370-12379} }
Reversible Decoupling Network for Single Image Reflection Removal: Hao Zhao,

Mingjia Li,

Qiming Hu,

Xiaojie Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hao and Li, Mingjia and Hu, Qiming and Guo, Xiaojie}, title = {Reversible Decoupling Network for Single Image Reflection Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26430-26439} }
Hierarchical Features Matter: A Deep Exploration of Progressive Parameterization Method for Dataset Distillation: Xinhao Zhong,

Hao Fang,

Bin Chen,

Xulin Gu,

Meikang Qiu,

Shuhan Qi,

Shu-Tao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Xinhao and Fang, Hao and Chen, Bin and Gu, Xulin and Qiu, Meikang and Qi, Shuhan and Xia, Shu-Tao}, title = {Hierarchical Features Matter: A Deep Exploration of Progressive Parameterization Method for Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30462-30471} }
Enduring, Efficient and Robust Trajectory Prediction Attack in Autonomous Driving via Optimization-Driven Multi-Frame Perturbation Framework: Yi Yu,

Weizhen Han,

Libing Wu,

Bingyi Liu,

Enshu Wang,

Zhuangzhuang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Yi and Han, Weizhen and Wu, Libing and Liu, Bingyi and Wang, Enshu and Zhang, Zhuangzhuang}, title = {Enduring, Efficient and Robust Trajectory Prediction Attack in Autonomous Driving via Optimization-Driven Multi-Frame Perturbation Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17229-17238} }
GLASS: Guided Latent Slot Diffusion for Object-Centric Learning: Krishnakant Singh,

Simone Schaub-Meyer,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2025_CVPR, author = {Singh, Krishnakant and Schaub-Meyer, Simone and Roth, Stefan}, title = {GLASS: Guided Latent Slot Diffusion for Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28673-28683} }
UNEM: UNrolled Generalized EM for Transductive Few-Shot Learning: Long Zhou,

Fereshteh Shakeri,

Aymen Sadraoui,

Mounir Kaaniche,

Jean-Christophe Pesquet,

Ismail Ben Ayed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Long and Shakeri, Fereshteh and Sadraoui, Aymen and Kaaniche, Mounir and Pesquet, Jean-Christophe and Ben Ayed, Ismail}, title = {UNEM: UNrolled Generalized EM for Transductive Few-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9665-9675} }
SASep: Saliency-Aware Structured Separation of Geometry and Feature for Open Set Learning on Point Clouds: Jinfeng Xu,

Xianzhi Li,

Yuan Tang,

Xu Han,

Qiao Yu,

Yixue Hao,

Long Hu,

Min Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jinfeng and Li, Xianzhi and Tang, Yuan and Han, Xu and Yu, Qiao and Hao, Yixue and Hu, Long and Chen, Min}, title = {SASep: Saliency-Aware Structured Separation of Geometry and Feature for Open Set Learning on Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27295-27304} }
Low-Biased General Annotated Dataset Generation: Dengyang Jiang,

Haoyu Wang,

Lei Zhang,

Wei Wei,

Guang Dai,

Mengmeng Wang,

Jingdong Wang,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Dengyang and Wang, Haoyu and Zhang, Lei and Wei, Wei and Dai, Guang and Wang, Mengmeng and Wang, Jingdong and Zhang, Yanning}, title = {Low-Biased General Annotated Dataset Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25113-25123} }
G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation: Tianxing Chen,

Yao Mu,

Zhixuan Liang,

Zanxin Chen,

Shijia Peng,

Qiangyu Chen,

Mingkun Xu,

Ruizhen Hu,

Hongyuan Zhang,

Xuelong Li,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Tianxing and Mu, Yao and Liang, Zhixuan and Chen, Zanxin and Peng, Shijia and Chen, Qiangyu and Xu, Mingkun and Hu, Ruizhen and Zhang, Hongyuan and Li, Xuelong and Luo, Ping}, title = {G3Flow: Generative 3D Semantic Flow for Pose-aware and Generalizable Object Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1735-1744} }
Generative Hard Example Augmentation for Semantic Point Cloud Segmentation: Qi Zhang,

Jibin Peng,

Zhao Huang,

Wei Feng,

Di Lin; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qi and Peng, Jibin and Huang, Zhao and Feng, Wei and Lin, Di}, title = {Generative Hard Example Augmentation for Semantic Point Cloud Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22205-22214} }
Toward Generalized Image Quality Assessment: Relaxing the Perfect Reference Quality Assumption: Du Chen,

Tianhe Wu,

Kede Ma,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Du and Wu, Tianhe and Ma, Kede and Zhang, Lei}, title = {Toward Generalized Image Quality Assessment: Relaxing the Perfect Reference Quality Assumption}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12742-12752} }
Explaining Domain Shifts in Language: Concept Erasing for Interpretable Image Classification: Zequn Zeng,

Yudi Su,

Jianqiao Sun,

Tiansheng Wen,

Hao Zhang,

Zhengjue Wang,

Bo Chen,

Hongwei Liu,

Jiawei Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Zequn and Su, Yudi and Sun, Jianqiao and Wen, Tiansheng and Zhang, Hao and Wang, Zhengjue and Chen, Bo and Liu, Hongwei and Ma, Jiawei}, title = {Explaining Domain Shifts in Language: Concept Erasing for Interpretable Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9517-9526} }
Hazy Low-Quality Satellite Video Restoration Via Learning Optimal Joint Degradation Patterns and Continuous-Scale Super-Resolution Reconstruction: Ning Ni,

Libao Zhang; [pdf]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Ning and Zhang, Libao}, title = {Hazy Low-Quality Satellite Video Restoration Via Learning Optimal Joint Degradation Patterns and Continuous-Scale Super-Resolution Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12690-12699} }
Textured Gaussians for Enhanced 3D Scene Appearance Modeling: Brian Chao,

Hung-Yu Tseng,

Lorenzo Porzi,

Chen Gao,

Tuotuo Li,

Qinbo Li,

Ayush Saraf,

Jia-Bin Huang,

Johannes Kopf,

Gordon Wetzstein,

Changil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chao_2025_CVPR, author = {Chao, Brian and Tseng, Hung-Yu and Porzi, Lorenzo and Gao, Chen and Li, Tuotuo and Li, Qinbo and Saraf, Ayush and Huang, Jia-Bin and Kopf, Johannes and Wetzstein, Gordon and Kim, Changil}, title = {Textured Gaussians for Enhanced 3D Scene Appearance Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8964-8974} }
NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval: Zengrong Lin,

Zheng Wang,

Tianwen Qian,

Pan Mu,

Sixian Chan,

Cong Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Zengrong and Wang, Zheng and Qian, Tianwen and Mu, Pan and Chan, Sixian and Bai, Cong}, title = {NeighborRetr: Balancing Hub Centrality in Cross-Modal Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9263-9273} }
ETAP: Event-based Tracking of Any Point: Friedhelm Hamann,

Daniel Gehrig,

Filbert Febryanto,

Kostas Daniilidis,

Guillermo Gallego; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hamann_2025_CVPR, author = {Hamann, Friedhelm and Gehrig, Daniel and Febryanto, Filbert and Daniilidis, Kostas and Gallego, Guillermo}, title = {ETAP: Event-based Tracking of Any Point}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27186-27196} }
Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual Knowledge: Yaqi Zhao,

Yuanyang Yin,

Lin Li,

Mingan Lin,

Victor Shea-Jay Huang,

Siwei Chen,

Weipeng Chen,

Baoqun Yin,

Zenan Zhou,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yaqi and Yin, Yuanyang and Li, Lin and Lin, Mingan and Huang, Victor Shea-Jay and Chen, Siwei and Chen, Weipeng and Yin, Baoqun and Zhou, Zenan and Zhang, Wentao}, title = {Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual Knowledge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24950-24959} }
Global-Local Tree Search in VLMs for 3D Indoor Scene Generation: Wei Deng,

Mengshi Qi,

Huadong Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Wei and Qi, Mengshi and Ma, Huadong}, title = {Global-Local Tree Search in VLMs for 3D Indoor Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8975-8984} }
Volumetric Surfaces: Representing Fuzzy Geometries with Layered Meshes: Stefano Esposito,

Anpei Chen,

Christian Reiser,

Samuel Rota Bulò,

Lorenzo Porzi,

Katja Schwarz,

Christian Richardt,

Michael Zollhöfer,

Peter Kontschieder,

Andreas Geiger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Esposito_2025_CVPR, author = {Esposito, Stefano and Chen, Anpei and Reiser, Christian and Bul\`o, Samuel Rota and Porzi, Lorenzo and Schwarz, Katja and Richardt, Christian and Zollh\"ofer, Michael and Kontschieder, Peter and Geiger, Andreas}, title = {Volumetric Surfaces: Representing Fuzzy Geometries with Layered Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21370-21380} }
Overcoming Shortcut Problem in VLM for Robust Out-of-Distribution Detection: Zhuo Xu,

Xiang Xiang,

Yifan Liang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zhuo and Xiang, Xiang and Liang, Yifan}, title = {Overcoming Shortcut Problem in VLM for Robust Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15402-15412} }
GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models with Generative Flow Networks: Haoqiang Kang,

Enna Sachdeva,

Piyush Gupta,

Sangjae Bae,

Kwonjoon Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Haoqiang and Sachdeva, Enna and Gupta, Piyush and Bae, Sangjae and Lee, Kwonjoon}, title = {GFlowVLM: Enhancing Multi-step Reasoning in Vision-Language Models with Generative Flow Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3815-3825} }
STEPS: Sequential Probability Tensor Estimation for Text-to-Image Hard Prompt Search: Yuning Qiu,

Andong Wang,

Chao Li,

Haonan Huang,

Guoxu Zhou,

Qibin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Yuning and Wang, Andong and Li, Chao and Huang, Haonan and Zhou, Guoxu and Zhao, Qibin}, title = {STEPS: Sequential Probability Tensor Estimation for Text-to-Image Hard Prompt Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28640-28650} }
RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics: Chan Hee Song,

Valts Blukis,

Jonathan Tremblay,

Stephen Tyree,

Yu Su,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Chan Hee and Blukis, Valts and Tremblay, Jonathan and Tyree, Stephen and Su, Yu and Birchfield, Stan}, title = {RoboSpatial: Teaching Spatial Understanding to 2D and 3D Vision-Language Models for Robotics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15768-15780} }
VIRES: Video Instance Repainting via Sketch and Text Guided Generation: Shuchen Weng,

Haojie Zheng,

Peixuan Zhang,

Yuchen Hong,

Han Jiang,

Si Li,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2025_CVPR, author = {Weng, Shuchen and Zheng, Haojie and Zhang, Peixuan and Hong, Yuchen and Jiang, Han and Li, Si and Shi, Boxin}, title = {VIRES: Video Instance Repainting via Sketch and Text Guided Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28416-28425} }
MAP: Unleashing Hybrid Mamba-Transformer Vision Backbone's Potential with Masked Autoregressive Pretraining: Yunze Liu,

Li Yi; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yunze and Yi, Li}, title = {MAP: Unleashing Hybrid Mamba-Transformer Vision Backbone's Potential with Masked Autoregressive Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9676-9685} }
Segment Any-Quality Images with Generative Latent Space Enhancement: Guangqian Guo,

Yong Guo,

Xuehui Yu,

Wenbo Li,

Yaoxing Wang,

Shan Gao; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Guangqian and Guo, Yong and Yu, Xuehui and Li, Wenbo and Wang, Yaoxing and Gao, Shan}, title = {Segment Any-Quality Images with Generative Latent Space Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2366-2376} }
BG-Triangle: Bezier Gaussian Triangle for 3D Vectorization and Rendering: Minye Wu,

Haizhao Dai,

Kaixin Yao,

Tinne Tuytelaars,

Jingyi Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Minye and Dai, Haizhao and Yao, Kaixin and Tuytelaars, Tinne and Yu, Jingyi}, title = {BG-Triangle: Bezier Gaussian Triangle for 3D Vectorization and Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16197-16207} }
MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling: Yifang Men,

Yuan Yao,

Miaomiao Cui,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Men_2025_CVPR, author = {Men, Yifang and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng}, title = {MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21181-21191} }
TKG-DM: Training-free Chroma Key Content Generation Diffusion Model: Ryugo Morita,

Stanislav Frolov,

Brian Bernhard Moser,

Takahiro Shirakawa,

Ko Watanabe,

Andreas Dengel,

Jinjia Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Morita_2025_CVPR, author = {Morita, Ryugo and Frolov, Stanislav and Moser, Brian Bernhard and Shirakawa, Takahiro and Watanabe, Ko and Dengel, Andreas and Zhou, Jinjia}, title = {TKG-DM: Training-free Chroma Key Content Generation Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13031-13040} }
Lift3D Policy: Lifting 2D Foundation Models for Robust 3D Robotic Manipulation: Yueru Jia,

Jiaming Liu,

Sixiang Chen,

Chenyang Gu,

Zhilve Wang,

Longzan Luo,

Xiaoqi Li,

Pengwei Wang,

Zhongyuan Wang,

Renrui Zhang,

Shanghang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Yueru and Liu, Jiaming and Chen, Sixiang and Gu, Chenyang and Wang, Zhilve and Luo, Longzan and Li, Xiaoqi and Wang, Pengwei and Wang, Zhongyuan and Zhang, Renrui and Zhang, Shanghang}, title = {Lift3D Policy: Lifting 2D Foundation Models for Robust 3D Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17347-17358} }
Multi-View Pose-Agnostic Change Localization with Zero Labels: Chamuditha Jayanga Galappaththige,

Jason Lai,

Lloyd Windrim,

Donald Dansereau,

Niko Sunderhauf,

Dimity Miller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Galappaththige_2025_CVPR, author = {Galappaththige, Chamuditha Jayanga and Lai, Jason and Windrim, Lloyd and Dansereau, Donald and Sunderhauf, Niko and Miller, Dimity}, title = {Multi-View Pose-Agnostic Change Localization with Zero Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11600-11610} }
From Sparse to Dense: Camera Relocalization with Scene-Specific Detector from Feature Gaussian Splatting: Zhiwei Huang,

Hailin Yu,

Yichun Shentu,

Jin Yuan,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhiwei and Yu, Hailin and Shentu, Yichun and Yuan, Jin and Zhang, Guofeng}, title = {From Sparse to Dense: Camera Relocalization with Scene-Specific Detector from Feature Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27059-27069} }
Accelerating Diffusion Transformer via Increment-Calibrated Caching with Channel-Aware Singular Value Decomposition: Zhiyuan Chen,

Keyi Li,

Yifan Jia,

Le Ye,

Yufei Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhiyuan and Li, Keyi and Jia, Yifan and Ye, Le and Ma, Yufei}, title = {Accelerating Diffusion Transformer via Increment-Calibrated Caching with Channel-Aware Singular Value Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18011-18020} }
CityWalker: Learning Embodied Urban Navigation from Web-Scale Videos: Xinhao Liu,

Jintong Li,

Yicheng Jiang,

Niranjan Sujay,

Zhicheng Yang,

Juexiao Zhang,

John Abanes,

Jing Zhang,

Chen Feng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xinhao and Li, Jintong and Jiang, Yicheng and Sujay, Niranjan and Yang, Zhicheng and Zhang, Juexiao and Abanes, John and Zhang, Jing and Feng, Chen}, title = {CityWalker: Learning Embodied Urban Navigation from Web-Scale Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6875-6885} }
A Simple yet Effective Layout Token in Large Language Models for Document Understanding: Zhaoqing Zhu,

Chuwei Luo,

Zirui Shao,

Feiyu Gao,

Hangdi Xing,

Qi Zheng,

Ji Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zhaoqing and Luo, Chuwei and Shao, Zirui and Gao, Feiyu and Xing, Hangdi and Zheng, Qi and Zhang, Ji}, title = {A Simple yet Effective Layout Token in Large Language Models for Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14472-14482} }
Reconstruction vs. Generation: Taming Optimization Dilemma in Latent Diffusion Models: Jingfeng Yao,

Bin Yang,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Jingfeng and Yang, Bin and Wang, Xinggang}, title = {Reconstruction vs. Generation: Taming Optimization Dilemma in Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15703-15712} }
StableAnimator: High-Quality Identity-Preserving Human Image Animation: Shuyuan Tu,

Zhen Xing,

Xintong Han,

Zhi-Qi Cheng,

Qi Dai,

Chong Luo,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Shuyuan and Xing, Zhen and Han, Xintong and Cheng, Zhi-Qi and Dai, Qi and Luo, Chong and Wu, Zuxuan}, title = {StableAnimator: High-Quality Identity-Preserving Human Image Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21096-21106} }
Learning Visual Composition through Improved Semantic Guidance: Austin Stone,

Hagen Soltau,

Robert Geirhos,

Xi Yi,

Ye Xia,

Bingyi Cao,

Kaifeng Chen,

Abhijit Ogale,

Jonathon Shlens; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stone_2025_CVPR, author = {Stone, Austin and Soltau, Hagen and Geirhos, Robert and Yi, Xi and Xia, Ye and Cao, Bingyi and Chen, Kaifeng and Ogale, Abhijit and Shlens, Jonathon}, title = {Learning Visual Composition through Improved Semantic Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3740-3750} }
OODD: Test-time Out-of-Distribution Detection with Dynamic Dictionary: Yifeng Yang,

Lin Zhu,

Zewen Sun,

Hengyu Liu,

Qinying Gu,

Nanyang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yifeng and Zhu, Lin and Sun, Zewen and Liu, Hengyu and Gu, Qinying and Ye, Nanyang}, title = {OODD: Test-time Out-of-Distribution Detection with Dynamic Dictionary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30630-30639} }
MEAT: Multiview Diffusion Model for Human Generation on Megapixels with Mesh Attention: Yuhan Wang,

Fangzhou Hong,

Shuai Yang,

Liming Jiang,

Wayne Wu,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhan and Hong, Fangzhou and Yang, Shuai and Jiang, Liming and Wu, Wayne and Loy, Chen Change}, title = {MEAT: Multiview Diffusion Model for Human Generation on Megapixels with Mesh Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11297-11306} }
Free Lunch Enhancements for Multi-modal Crowd Counting: Haoliang Meng,

Xiaopeng Hong,

Zhengqin Lai,

Miao Shang; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Haoliang and Hong, Xiaopeng and Lai, Zhengqin and Shang, Miao}, title = {Free Lunch Enhancements for Multi-modal Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14013-14023} }
BIMBA: Selective-Scan Compression for Long-Range Video Question Answering: Md Mohaiminul Islam,

Tushar Nagarajan,

Huiyu Wang,

Gedas Bertasius,

Lorenzo Torresani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Islam_2025_CVPR, author = {Islam, Md Mohaiminul and Nagarajan, Tushar and Wang, Huiyu and Bertasius, Gedas and Torresani, Lorenzo}, title = {BIMBA: Selective-Scan Compression for Long-Range Video Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29096-29107} }
EVolSplat: Efficient Volume-based Gaussian Splatting for Urban View Synthesis: Sheng Miao,

Jiaxin Huang,

Dongfeng Bai,

Xu Yan,

Hongyu Zhou,

Yue Wang,

Bingbing Liu,

Andreas Geiger,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_CVPR, author = {Miao, Sheng and Huang, Jiaxin and Bai, Dongfeng and Yan, Xu and Zhou, Hongyu and Wang, Yue and Liu, Bingbing and Geiger, Andreas and Liao, Yiyi}, title = {EVolSplat: Efficient Volume-based Gaussian Splatting for Urban View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11286-11296} }
Diff2Flow: Training Flow Matching Models via Diffusion Model Alignment: Johannes Schusterbauer,

Ming Gui,

Frank Fundel,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schusterbauer_2025_CVPR, author = {Schusterbauer, Johannes and Gui, Ming and Fundel, Frank and Ommer, Bj\"orn}, title = {Diff2Flow: Training Flow Matching Models via Diffusion Model Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28347-28357} }
JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation: Yiyang Ma,

Xingchao Liu,

Xiaokang Chen,

Wen Liu,

Chengyue Wu,

Zhiyu Wu,

Zizheng Pan,

Zhenda Xie,

Haowei Zhang,

Xingkai Yu,

Liang Zhao,

Yisong Wang,

Jiaying Liu,

Chong Ruan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Yiyang and Liu, Xingchao and Chen, Xiaokang and Liu, Wen and Wu, Chengyue and Wu, Zhiyu and Pan, Zizheng and Xie, Zhenda and Zhang, Haowei and Yu, Xingkai and Zhao, Liang and Wang, Yisong and Liu, Jiaying and Ruan, Chong}, title = {JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7739-7751} }
Visual Prompting for One-shot Controllable Video Editing without Inversion: Zhengbo Zhang,

Yuxi Zhou,

Duo Peng,

Joo-Hwee Lim,

Zhigang Tu,

De Wen Soh,

Lin Geng Foo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhengbo and Zhou, Yuxi and Peng, Duo and Lim, Joo-Hwee and Tu, Zhigang and Soh, De Wen and Foo, Lin Geng}, title = {Visual Prompting for One-shot Controllable Video Editing without Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7784-7794} }
PIDSR: Complementary Polarized Image Demosaicing and Super-Resolution: Shuangfan Zhou,

Chu Zhou,

Youwei Lyu,

Heng Guo,

Zhanyu Ma,

Boxin Shi,

Imari Sato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shuangfan and Zhou, Chu and Lyu, Youwei and Guo, Heng and Ma, Zhanyu and Shi, Boxin and Sato, Imari}, title = {PIDSR: Complementary Polarized Image Demosaicing and Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16081-16090} }
MegaSynth: Scaling Up 3D Scene Reconstruction with Synthesized Data: Hanwen Jiang,

Zexiang Xu,

Desai Xie,

Ziwen Chen,

Haian Jin,

Fujun Luan,

Zhixin Shu,

Kai Zhang,

Sai Bi,

Xin Sun,

Jiuxiang Gu,

Qixing Huang,

Georgios Pavlakos,

Hao Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Hanwen and Xu, Zexiang and Xie, Desai and Chen, Ziwen and Jin, Haian and Luan, Fujun and Shu, Zhixin and Zhang, Kai and Bi, Sai and Sun, Xin and Gu, Jiuxiang and Huang, Qixing and Pavlakos, Georgios and Tan, Hao}, title = {MegaSynth: Scaling Up 3D Scene Reconstruction with Synthesized Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16441-16452} }
Prof. Robot: Differentiable Robot Rendering Without Static and Self-Collisions: Quanyuan Ruan,

Jiabao Lei,

Wenhao Yuan,

Yanglin Zhang,

Dekun Lu,

Guiliang Liu,

Kui Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruan_2025_CVPR, author = {Ruan, Quanyuan and Lei, Jiabao and Yuan, Wenhao and Zhang, Yanglin and Lu, Dekun and Liu, Guiliang and Jia, Kui}, title = {Prof. Robot: Differentiable Robot Rendering Without Static and Self-Collisions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22562-22572} }
AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning: Kaixuan Wu,

Xinde Li,

Xinling Li,

Chuanfei Hu,

Guoliang Wu; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Kaixuan and Li, Xinde and Li, Xinling and Hu, Chuanfei and Wu, Guoliang}, title = {AVQACL: A Novel Benchmark for Audio-Visual Question Answering Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3252-3261} }
Flash-Split: 2D Reflection Removal with Flash Cues and Latent Diffusion Separation: Tianfu Wang,

Mingyang Xie,

Haoming Cai,

Sachin Shah,

Christopher A. Metzler; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tianfu and Xie, Mingyang and Cai, Haoming and Shah, Sachin and Metzler, Christopher A.}, title = {Flash-Split: 2D Reflection Removal with Flash Cues and Latent Diffusion Separation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5688-5698} }
Attention IoU: Examining Biases in CelebA using Attention Maps: Aaron Serianni,

Tyler Zhu,

Olga Russakovsky,

Vikram V. Ramaswamy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Serianni_2025_CVPR, author = {Serianni, Aaron and Zhu, Tyler and Russakovsky, Olga and Ramaswamy, Vikram V.}, title = {Attention IoU: Examining Biases in CelebA using Attention Maps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4386-4397} }
HEIE: MLLM-Based Hierarchical Explainable AIGC Image Implausibility Evaluator: Fan Yang,

Ru Zhen,

Jianing Wang,

Yanhao Zhang,

Haoxiang Chen,

Haonan Lu,

Sicheng Zhao,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Fan and Zhen, Ru and Wang, Jianing and Zhang, Yanhao and Chen, Haoxiang and Lu, Haonan and Zhao, Sicheng and Ding, Guiguang}, title = {HEIE: MLLM-Based Hierarchical Explainable AIGC Image Implausibility Evaluator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3856-3866} }
Segment Any Motion in Videos: Nan Huang,

Wenzhao Zheng,

Chenfeng Xu,

Kurt Keutzer,

Shanghang Zhang,

Angjoo Kanazawa,

Qianqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Nan and Zheng, Wenzhao and Xu, Chenfeng and Keutzer, Kurt and Zhang, Shanghang and Kanazawa, Angjoo and Wang, Qianqian}, title = {Segment Any Motion in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3406-3416} }
HandOS: 3D Hand Reconstruction in One Stage: Xingyu Chen,

Zhuheng Song,

Xiaoke Jiang,

Yaoqing Hu,

Junzhi Yu,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xingyu and Song, Zhuheng and Jiang, Xiaoke and Hu, Yaoqing and Yu, Junzhi and Zhang, Lei}, title = {HandOS: 3D Hand Reconstruction in One Stage}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17304-17314} }
Task-aware Cross-modal Feature Refinement Transformer with Large Language Models for Visual Grounding: Wenbo Chen,

Zhen Xu,

Ruotao Xu,

Si Wu,

Hau-San Wong; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Wenbo and Xu, Zhen and Xu, Ruotao and Wu, Si and Wong, Hau-San}, title = {Task-aware Cross-modal Feature Refinement Transformer with Large Language Models for Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3931-3941} }
DropGaussian: Structural Regularization for Sparse-view Gaussian Splatting: Hyunwoo Park,

Gun Ryu,

Wonjun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Hyunwoo and Ryu, Gun and Kim, Wonjun}, title = {DropGaussian: Structural Regularization for Sparse-view Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21600-21609} }
All-Day Multi-Camera Multi-Target Tracking: Huijie Fan,

Yu Qiao,

Yihao Zhen,

Tinghui Zhao,

Baojie Fan,

Qiang Wang; [pdf]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Huijie and Qiao, Yu and Zhen, Yihao and Zhao, Tinghui and Fan, Baojie and Wang, Qiang}, title = {All-Day Multi-Camera Multi-Target Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16892-16901} }
Blurred LiDAR for Sharper 3D: Robust Handheld 3D Scanning with Diffuse LiDAR and RGB: Nikhil Behari,

Aaron Young,

Siddharth Somasundaram,

Tzofi Klinghoffer,

Akshat Dave,

Ramesh Raskar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behari_2025_CVPR, author = {Behari, Nikhil and Young, Aaron and Somasundaram, Siddharth and Klinghoffer, Tzofi and Dave, Akshat and Raskar, Ramesh}, title = {Blurred LiDAR for Sharper 3D: Robust Handheld 3D Scanning with Diffuse LiDAR and RGB}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26954-26964} }
EnergyMoGen: Compositional Human Motion Generation with Energy-Based Diffusion Model in Latent Space: Jianrong Zhang,

Hehe Fan,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianrong and Fan, Hehe and Yang, Yi}, title = {EnergyMoGen: Compositional Human Motion Generation with Energy-Based Diffusion Model in Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17592-17602} }
PatchDEMUX: A Certifiably Robust Framework for Multi-label Classifiers Against Adversarial Patches: Dennis Jacob,

Chong Xiang,

Prateek Mittal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jacob_2025_CVPR, author = {Jacob, Dennis and Xiang, Chong and Mittal, Prateek}, title = {PatchDEMUX: A Certifiably Robust Framework for Multi-label Classifiers Against Adversarial Patches}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9944-9953} }
StarVector: Generating Scalable Vector Graphics Code from Images and Text: Juan A. Rodriguez,

Abhay Puri,

Shubham Agarwal,

Issam H. Laradji,

Pau Rodriguez,

Sai Rajeswar,

David Vazquez,

Christopher Pal,

Marco Pedersoli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rodriguez_2025_CVPR, author = {Rodriguez, Juan A. and Puri, Abhay and Agarwal, Shubham and Laradji, Issam H. and Rodriguez, Pau and Rajeswar, Sai and Vazquez, David and Pal, Christopher and Pedersoli, Marco}, title = {StarVector: Generating Scalable Vector Graphics Code from Images and Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16175-16186} }
Novel View Synthesis with Pixel-Space Diffusion Models: Noam Elata,

Bahjat Kawar,

Yaron Ostrovsky-Berman,

Miriam Farber,

Ron Sokolovsky; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elata_2025_CVPR, author = {Elata, Noam and Kawar, Bahjat and Ostrovsky-Berman, Yaron and Farber, Miriam and Sokolovsky, Ron}, title = {Novel View Synthesis with Pixel-Space Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26756-26766} }
Object Detection using Event Camera: A MoE Heat Conduction based Detector and A New Benchmark Dataset: Xiao Wang,

Yu Jin,

Wentao Wu,

Wei Zhang,

Lin Zhu,

Bo Jiang,

Yonghong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xiao and Jin, Yu and Wu, Wentao and Zhang, Wei and Zhu, Lin and Jiang, Bo and Tian, Yonghong}, title = {Object Detection using Event Camera: A MoE Heat Conduction based Detector and A New Benchmark Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29321-29330} }
EgoTextVQA: Towards Egocentric Scene-Text Aware Video Question Answering: Sheng Zhou,

Junbin Xiao,

Qingyun Li,

Yicong Li,

Xun Yang,

Dan Guo,

Meng Wang,

Tat-Seng Chua,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Sheng and Xiao, Junbin and Li, Qingyun and Li, Yicong and Yang, Xun and Guo, Dan and Wang, Meng and Chua, Tat-Seng and Yao, Angela}, title = {EgoTextVQA: Towards Egocentric Scene-Text Aware Video Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3363-3373} }
Token Cropr: Faster ViTs for Quite a Few Tasks: Benjamin Bergner,

Christoph Lippert,

Aravindh Mahendran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bergner_2025_CVPR, author = {Bergner, Benjamin and Lippert, Christoph and Mahendran, Aravindh}, title = {Token Cropr: Faster ViTs for Quite a Few Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9740-9750} }
STCOcc: Sparse Spatial-Temporal Cascade Renovation for 3D Occupancy and Scene Flow Prediction: Zhimin Liao,

Ping Wei,

Shuaijia Chen,

Haoxuan Wang,

Ziyang Ren; [pdf] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Zhimin and Wei, Ping and Chen, Shuaijia and Wang, Haoxuan and Ren, Ziyang}, title = {STCOcc: Sparse Spatial-Temporal Cascade Renovation for 3D Occupancy and Scene Flow Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1516-1526} }
Document Haystacks: Vision-Language Reasoning Over Piles of 1000+ Documents: Jun Chen,

Dannong Xu,

Junjie Fei,

Chun-Mei Feng,

Mohamed Elhoseiny; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jun and Xu, Dannong and Fei, Junjie and Feng, Chun-Mei and Elhoseiny, Mohamed}, title = {Document Haystacks: Vision-Language Reasoning Over Piles of 1000+ Documents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24817-24826} }
Rethinking Few-Shot Adaptation of Vision-Language Models in Two Stages: Matteo Farina,

Massimiliano Mancini,

Giovanni Iacca,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Farina_2025_CVPR, author = {Farina, Matteo and Mancini, Massimiliano and Iacca, Giovanni and Ricci, Elisa}, title = {Rethinking Few-Shot Adaptation of Vision-Language Models in Two Stages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29989-29998} }
Resilient Sensor Fusion Under Adverse Sensor Failures via Multi-Modal Expert Fusion: Konyul Park,

Yecheol Kim,

Daehun Kim,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Konyul and Kim, Yecheol and Kim, Daehun and Choi, Jun Won}, title = {Resilient Sensor Fusion Under Adverse Sensor Failures via Multi-Modal Expert Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6720-6729} }
TAGA: Self-supervised Learning for Template-free Animatable Gaussian Articulated Model: Zhichao Zhai,

Guikun Chen,

Wenguan Wang,

Dong Zheng,

Jun Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Zhai_2025_CVPR, author = {Zhai, Zhichao and Chen, Guikun and Wang, Wenguan and Zheng, Dong and Xiao, Jun}, title = {TAGA: Self-supervised Learning for Template-free Animatable Gaussian Articulated Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21159-21169} }
MambaVO: Deep Visual Odometry Based on Sequential Matching Refinement and Training Smoothing: Shuo Wang,

Wanting Li,

Yongcai Wang,

Zhaoxin Fan,

Zhe Huang,

Xudong Cai,

Jian Zhao,

Deying Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shuo and Li, Wanting and Wang, Yongcai and Fan, Zhaoxin and Huang, Zhe and Cai, Xudong and Zhao, Jian and Li, Deying}, title = {MambaVO: Deep Visual Odometry Based on Sequential Matching Refinement and Training Smoothing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1252-1262} }
Explaining in Diffusion: Explaining a Classifier with Diffusion Semantics: Tahira Kazimi,

Ritika Allada,

Pinar Yanardag; [pdf] [supp]
[bibtex]
@InProceedings{Kazimi_2025_CVPR, author = {Kazimi, Tahira and Allada, Ritika and Yanardag, Pinar}, title = {Explaining in Diffusion: Explaining a Classifier with Diffusion Semantics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14799-14809} }
Horizon-GS: Unified 3D Gaussian Splatting for Large-Scale Aerial-to-Ground Scenes: Lihan Jiang,

Kerui Ren,

Mulin Yu,

Linning Xu,

Junting Dong,

Tao Lu,

Feng Zhao,

Dahua Lin,

Bo Dai; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Lihan and Ren, Kerui and Yu, Mulin and Xu, Linning and Dong, Junting and Lu, Tao and Zhao, Feng and Lin, Dahua and Dai, Bo}, title = {Horizon-GS: Unified 3D Gaussian Splatting for Large-Scale Aerial-to-Ground Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26789-26799} }
Attention Distillation: A Unified Approach to Visual Characteristics Transfer: Yang Zhou,

Xu Gao,

Zichong Chen,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yang and Gao, Xu and Chen, Zichong and Huang, Hui}, title = {Attention Distillation: A Unified Approach to Visual Characteristics Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18270-18280} }
From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing: Jingxuan Wei,

Cheng Tan,

Qi Chen,

Gaowei Wu,

Siyuan Li,

Zhangyang Gao,

Linzhuang Sun,

Bihui Yu,

Ruifeng Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Jingxuan and Tan, Cheng and Chen, Qi and Wu, Gaowei and Li, Siyuan and Gao, Zhangyang and Sun, Linzhuang and Yu, Bihui and Guo, Ruifeng}, title = {From Words to Structured Visuals: A Benchmark and Framework for Text-to-Diagram Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13315-13325} }
LotusFilter: Fast Diverse Nearest Neighbor Search via a Learned Cutoff Table: Yusuke Matsui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsui_2025_CVPR, author = {Matsui, Yusuke}, title = {LotusFilter: Fast Diverse Nearest Neighbor Search via a Learned Cutoff Table}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30430-30439} }
DreamRelation: Bridging Customization and Relation Generation: Qingyu Shi,

Lu Qi,

Jianzong Wu,

Jinbin Bai,

Jingbo Wang,

Yunhai Tong,

Xiangtai Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Qingyu and Qi, Lu and Wu, Jianzong and Bai, Jinbin and Wang, Jingbo and Tong, Yunhai and Li, Xiangtai}, title = {DreamRelation: Bridging Customization and Relation Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15723-15732} }
IndoorGS: Geometric Cues Guided Gaussian Splatting for Indoor Scene Reconstruction: Cong Ruan,

Yuesong Wang,

Tao Guan,

Bin Zhang,

Lili Ju; [pdf] [supp]
[bibtex]
@InProceedings{Ruan_2025_CVPR, author = {Ruan, Cong and Wang, Yuesong and Guan, Tao and Zhang, Bin and Ju, Lili}, title = {IndoorGS: Geometric Cues Guided Gaussian Splatting for Indoor Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {844-853} }
Point-Cache: Test-time Dynamic and Hierarchical Cache for Robust and Generalizable Point Cloud Analysis: Hongyu Sun,

Qiuhong Ke,

Ming Cheng,

Yongcai Wang,

Deying Li,

Chenhui Gou,

Jianfei Cai; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Hongyu and Ke, Qiuhong and Cheng, Ming and Wang, Yongcai and Li, Deying and Gou, Chenhui and Cai, Jianfei}, title = {Point-Cache: Test-time Dynamic and Hierarchical Cache for Robust and Generalizable Point Cloud Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1263-1275} }
Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation: Yuanqi Yao,

Siao Liu,

Haoming Song,

Delin Qu,

Qizhi Chen,

Yan Ding,

Bin Zhao,

Zhigang Wang,

Xuelong Li,

Dong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Yuanqi and Liu, Siao and Song, Haoming and Qu, Delin and Chen, Qizhi and Ding, Yan and Zhao, Bin and Wang, Zhigang and Li, Xuelong and Wang, Dong}, title = {Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22573-22583} }
Stop Walking in Circles! Bailing Out Early in Projected Gradient Descent: Philip Doldo,

Derek Everett,

Amol Khanna,

Andre T Nguyen,

Edward Raff; [pdf] [arXiv]
[bibtex]
@InProceedings{Doldo_2025_CVPR, author = {Doldo, Philip and Everett, Derek and Khanna, Amol and Nguyen, Andre T and Raff, Edward}, title = {Stop Walking in Circles! Bailing Out Early in Projected Gradient Descent}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6373-6382} }
MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation: Zhenyu Wu,

Yuheng Zhou,

Xiuwei Xu,

Ziwei Wang,

Haibin Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Zhenyu and Zhou, Yuheng and Xu, Xiuwei and Wang, Ziwei and Yan, Haibin}, title = {MoManipVLA: Transferring Vision-language-action Models for General Mobile Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1714-1723} }
SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning: Seokju Yun,

Seunghye Chae,

Dongheon Lee,

Youngmin Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_CVPR, author = {Yun, Seokju and Chae, Seunghye and Lee, Dongheon and Ro, Youngmin}, title = {SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25602-25612} }
Depth-Guided Bundle Sampling for Efficient Generalizable Neural Radiance Field Reconstruction: Li Fang,

Hao Zhu,

Longlong Chen,

Fei Hu,

Long Ye,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Li and Zhu, Hao and Chen, Longlong and Hu, Fei and Ye, Long and Ma, Zhan}, title = {Depth-Guided Bundle Sampling for Efficient Generalizable Neural Radiance Field Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11217-11226} }
TinyFusion: Diffusion Transformers Learned Shallow: Gongfan Fang,

Kunjun Li,

Xinyin Ma,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Gongfan and Li, Kunjun and Ma, Xinyin and Wang, Xinchao}, title = {TinyFusion: Diffusion Transformers Learned Shallow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18144-18154} }
Ref-GS: Directional Factorization for 2D Gaussian Splatting: Youjia Zhang,

Anpei Chen,

Yumin Wan,

Zikai Song,

Junqing Yu,

Yawei Luo,

Wei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Youjia and Chen, Anpei and Wan, Yumin and Song, Zikai and Yu, Junqing and Luo, Yawei and Yang, Wei}, title = {Ref-GS: Directional Factorization for 2D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26483-26492} }
SVG-IR: Spatially-Varying Gaussian Splatting for Inverse Rendering: Hanxiao Sun,

Yupeng Gao,

Jin Xie,

Jian Yang,

Beibei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Hanxiao and Gao, Yupeng and Xie, Jin and Yang, Jian and Wang, Beibei}, title = {SVG-IR: Spatially-Varying Gaussian Splatting for Inverse Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16143-16152} }
Stable-SCore: A Stable Registration-based Framework for 3D Shape Correspondence: Haolin Liu,

Xiaohang Zhan,

Zizheng Yan,

Zhongjin Luo,

Yuxin Wen,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Haolin and Zhan, Xiaohang and Yan, Zizheng and Luo, Zhongjin and Wen, Yuxin and Han, Xiaoguang}, title = {Stable-SCore: A Stable Registration-based Framework for 3D Shape Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {917-928} }
Beyond Single-Modal Boundary: Cross-Modal Anomaly Detection through Visual Prototype and Harmonization: Kai Mao,

Ping Wei,

Yiyang Lian,

Yangyang Wang,

Nanning Zheng; [pdf]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Kai and Wei, Ping and Lian, Yiyang and Wang, Yangyang and Zheng, Nanning}, title = {Beyond Single-Modal Boundary: Cross-Modal Anomaly Detection through Visual Prototype and Harmonization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9964-9973} }
VDocRAG: Retrieval-Augmented Generation over Visually-Rich Documents: Ryota Tanaka,

Taichi Iki,

Taku Hasegawa,

Kyosuke Nishida,

Kuniko Saito,

Jun Suzuki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tanaka_2025_CVPR, author = {Tanaka, Ryota and Iki, Taichi and Hasegawa, Taku and Nishida, Kyosuke and Saito, Kuniko and Suzuki, Jun}, title = {VDocRAG: Retrieval-Augmented Generation over Visually-Rich Documents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24827-24837} }
Align-KD: Distilling Cross-Modal Alignment Knowledge for Mobile Vision-Language Large Model Enhancement: Qianhan Feng,

Wenshuo Li,

Tong Lin,

Xinghao Chen; [pdf]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Qianhan and Li, Wenshuo and Lin, Tong and Chen, Xinghao}, title = {Align-KD: Distilling Cross-Modal Alignment Knowledge for Mobile Vision-Language Large Model Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4178-4188} }
Pose Priors from Language Models: Sanjay Subramanian,

Evonne Ng,

Lea Müller,

Dan Klein,

Shiry Ginosar,

Trevor Darrell; [pdf] [supp]
[bibtex]
@InProceedings{Subramanian_2025_CVPR, author = {Subramanian, Sanjay and Ng, Evonne and M\"uller, Lea and Klein, Dan and Ginosar, Shiry and Darrell, Trevor}, title = {Pose Priors from Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7125-7135} }
Concept Lancet: Image Editing with Compositional Representation Transplant: Jinqi Luo,

Tianjiao Ding,

Kwan Ho Ryan Chan,

Hancheng Min,

Chris Callison-Burch,

Rene Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Jinqi and Ding, Tianjiao and Chan, Kwan Ho Ryan and Min, Hancheng and Callison-Burch, Chris and Vidal, Rene}, title = {Concept Lancet: Image Editing with Compositional Representation Transplant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28502-28512} }
Scaling Mesh Generation via Compressive Tokenization: Haohan Weng,

Zibo Zhao,

Biwen Lei,

Xianghui Yang,

Jian Liu,

Zeqiang Lai,

Zhuo Chen,

Yuhong Liu,

Jie Jiang,

Chunchao Guo,

Tong Zhang,

Shenghua Gao,

C.L. Philip Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2025_CVPR, author = {Weng, Haohan and Zhao, Zibo and Lei, Biwen and Yang, Xianghui and Liu, Jian and Lai, Zeqiang and Chen, Zhuo and Liu, Yuhong and Jiang, Jie and Guo, Chunchao and Zhang, Tong and Gao, Shenghua and Chen, C.L. Philip}, title = {Scaling Mesh Generation via Compressive Tokenization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11093-11103} }
Generative Densification: Learning to Densify Gaussians for High-Fidelity Generalizable 3D Reconstruction: Seungtae Nam,

Xiangyu Sun,

Gyeongjin Kang,

Younggeun Lee,

Seungjun Oh,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_CVPR, author = {Nam, Seungtae and Sun, Xiangyu and Kang, Gyeongjin and Lee, Younggeun and Oh, Seungjun and Park, Eunbyung}, title = {Generative Densification: Learning to Densify Gaussians for High-Fidelity Generalizable 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26683-26693} }
LogoSP: Local-global Grouping of Superpoints for Unsupervised Semantic Segmentation of 3D Point Clouds: Zihui Zhang,

Weisheng Dai,

Hongtao Wen,

Bo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zihui and Dai, Weisheng and Wen, Hongtao and Yang, Bo}, title = {LogoSP: Local-global Grouping of Superpoints for Unsupervised Semantic Segmentation of 3D Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1374-1384} }
Exploring Intrinsic Normal Prototypes within a Single Image for Universal Anomaly Detection: Wei Luo,

Yunkang Cao,

Haiming Yao,

Xiaotian Zhang,

Jianan Lou,

Yuqi Cheng,

Weiming Shen,

Wenyong Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Wei and Cao, Yunkang and Yao, Haiming and Zhang, Xiaotian and Lou, Jianan and Cheng, Yuqi and Shen, Weiming and Yu, Wenyong}, title = {Exploring Intrinsic Normal Prototypes within a Single Image for Universal Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9974-9983} }
Augmenting Perceptual Super-Resolution via Image Quality Predictors: Fengjia Zhang,

Samrudhdhi B. Rangrej,

Tristan Aumentado-Armstrong,

Afsaneh Fazly,

Alex Levinshtein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Fengjia and Rangrej, Samrudhdhi B. and Aumentado-Armstrong, Tristan and Fazly, Afsaneh and Levinshtein, Alex}, title = {Augmenting Perceptual Super-Resolution via Image Quality Predictors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2311-2322} }
TurboFill: Adapting Few-step Text-to-image Model for Fast Image Inpainting: Liangbin Xie,

Daniil Pakhomov,

Zhonghao Wang,

Zongze Wu,

Ziyan Chen,

Yuqian Zhou,

Haitian Zheng,

Zhifei Zhang,

Zhe Lin,

Jiantao Zhou,

Chao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Liangbin and Pakhomov, Daniil and Wang, Zhonghao and Wu, Zongze and Chen, Ziyan and Zhou, Yuqian and Zheng, Haitian and Zhang, Zhifei and Lin, Zhe and Zhou, Jiantao and Dong, Chao}, title = {TurboFill: Adapting Few-step Text-to-image Model for Fast Image Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7613-7622} }
Stochastic Human Motion Prediction with Memory of Action Transition and Action Characteristic: Jianwei Tang,

Hong Yang,

Tengyue Chen,

Jian-Fang Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Jianwei and Yang, Hong and Chen, Tengyue and Hu, Jian-Fang}, title = {Stochastic Human Motion Prediction with Memory of Action Transition and Action Characteristic}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1883-1893} }
Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis: Chaoyou Fu,

Yuhan Dai,

Yongdong Luo,

Lei Li,

Shuhuai Ren,

Renrui Zhang,

Zihan Wang,

Chenyu Zhou,

Yunhang Shen,

Mengdan Zhang,

Peixian Chen,

Yanwei Li,

Shaohui Lin,

Sirui Zhao,

Ke Li,

Tong Xu,

Xiawu Zheng,

Enhong Chen,

Caifeng Shan,

Ran He,

Xing Sun; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Chaoyou and Dai, Yuhan and Luo, Yongdong and Li, Lei and Ren, Shuhuai and Zhang, Renrui and Wang, Zihan and Zhou, Chenyu and Shen, Yunhang and Zhang, Mengdan and Chen, Peixian and Li, Yanwei and Lin, Shaohui and Zhao, Sirui and Li, Ke and Xu, Tong and Zheng, Xiawu and Chen, Enhong and Shan, Caifeng and He, Ran and Sun, Xing}, title = {Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24108-24118} }
Perception Tokens Enhance Visual Reasoning in Multimodal Language Models: Mahtab Bigverdi,

Zelun Luo,

Cheng-Yu Hsieh,

Ethan Shen,

Dongping Chen,

Linda G. Shapiro,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bigverdi_2025_CVPR, author = {Bigverdi, Mahtab and Luo, Zelun and Hsieh, Cheng-Yu and Shen, Ethan and Chen, Dongping and Shapiro, Linda G. and Krishna, Ranjay}, title = {Perception Tokens Enhance Visual Reasoning in Multimodal Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3836-3845} }
Are Images Indistinguishable to Humans Also Indistinguishable to Classifiers?: Zebin You,

Xinyu Zhang,

Hanzhong Guo,

Jingdong Wang,

Chongxuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Zebin and Zhang, Xinyu and Guo, Hanzhong and Wang, Jingdong and Li, Chongxuan}, title = {Are Images Indistinguishable to Humans Also Indistinguishable to Classifiers?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28790-28800} }
X-Dyna: Expressive Dynamic Human Image Animation: Di Chang,

Hongyi Xu,

You Xie,

Yipeng Gao,

Zhengfei Kuang,

Shengqu Cai,

Chenxu Zhang,

Guoxian Song,

Chao Wang,

Yichun Shi,

Zeyuan Chen,

Shijie Zhou,

Linjie Luo,

Gordon Wetzstein,

Mohammad Soleymani; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Di and Xu, Hongyi and Xie, You and Gao, Yipeng and Kuang, Zhengfei and Cai, Shengqu and Zhang, Chenxu and Song, Guoxian and Wang, Chao and Shi, Yichun and Chen, Zeyuan and Zhou, Shijie and Luo, Linjie and Wetzstein, Gordon and Soleymani, Mohammad}, title = {X-Dyna: Expressive Dynamic Human Image Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5499-5509} }
Understanding Multi-layered Transmission Matrices: Anat Levin,

Marina Alterman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Levin_2025_CVPR, author = {Levin, Anat and Alterman, Marina}, title = {Understanding Multi-layered Transmission Matrices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23164-23173} }
GS-DiT: Advancing Video Generation with Dynamic 3D Gaussian Fields through Efficient Dense 3D Point Tracking: Weikang Bian,

Zhaoyang Huang,

Xiaoyu Shi,

Yijin Li,

Fu-Yun Wang,

Hongsheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2025_CVPR, author = {Bian, Weikang and Huang, Zhaoyang and Shi, Xiaoyu and Li, Yijin and Wang, Fu-Yun and Li, Hongsheng}, title = {GS-DiT: Advancing Video Generation with Dynamic 3D Gaussian Fields through Efficient Dense 3D Point Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21717-21727} }
AnyMoLe: Any Character Motion In-betweening Leveraging Video Diffusion Models: Kwan Yun,

Seokhyeon Hong,

Chaelin Kim,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_CVPR, author = {Yun, Kwan and Hong, Seokhyeon and Kim, Chaelin and Noh, Junyong}, title = {AnyMoLe: Any Character Motion In-betweening Leveraging Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27838-27848} }
Towards Optimizing Large-Scale Multi-Graph Matching in Bioimaging: Max Kahl,

Sebastian Stricker,

Lisa Hutschenreiter,

Florian Bernard,

Carsten Rother,

Bogdan Savchynskyy; [pdf] [supp]
[bibtex]
@InProceedings{Kahl_2025_CVPR, author = {Kahl, Max and Stricker, Sebastian and Hutschenreiter, Lisa and Bernard, Florian and Rother, Carsten and Savchynskyy, Bogdan}, title = {Towards Optimizing Large-Scale Multi-Graph Matching in Bioimaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11569-11578} }
Towards Effective and Sparse Adversarial Attack on Spiking Neural Networks via Breaking Invisible Surrogate Gradients: Li Lun,

Kunyu Feng,

Qinglong Ni,

Ling Liang,

Yuan Wang,

Ying Li,

Dunshan Yu,

Xiaoxin Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lun_2025_CVPR, author = {Lun, Li and Feng, Kunyu and Ni, Qinglong and Liang, Ling and Wang, Yuan and Li, Ying and Yu, Dunshan and Cui, Xiaoxin}, title = {Towards Effective and Sparse Adversarial Attack on Spiking Neural Networks via Breaking Invisible Surrogate Gradients}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3540-3551} }
Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation: Gianni Franchi,

Nacim Belkhir,

Dat Nguyen Trong,

Guoxuan Xia,

Andrea Pilzer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Franchi_2025_CVPR, author = {Franchi, Gianni and Belkhir, Nacim and Trong, Dat Nguyen and Xia, Guoxuan and Pilzer, Andrea}, title = {Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8062-8072} }
PS-Diffusion: Photorealistic Subject-Driven Image Editing with Disentangled Control and Attention: Weicheng Wang,

Guoli Jia,

Zhongqi Zhang,

Liang Lin,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Weicheng and Jia, Guoli and Zhang, Zhongqi and Lin, Liang and Yang, Jufeng}, title = {PS-Diffusion: Photorealistic Subject-Driven Image Editing with Disentangled Control and Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18302-18312} }
Exploring Visual Vulnerabilities via Multi-Loss Adversarial Search for Jailbreaking Vision-Language Models: Shuyang Hao,

Bryan Hooi,

Jun Liu,

Kai-Wei Chang,

Zi Huang,

Yujun Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Shuyang and Hooi, Bryan and Liu, Jun and Chang, Kai-Wei and Huang, Zi and Cai, Yujun}, title = {Exploring Visual Vulnerabilities via Multi-Loss Adversarial Search for Jailbreaking Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19890-19899} }
LLaVA-ST: A Multimodal Large Language Model for Fine-Grained Spatial-Temporal Understanding: Hongyu Li,

Jinyu Chen,

Ziyu Wei,

Shaofei Huang,

Tianrui Hui,

Jialin Gao,

Xiaoming Wei,

Si Liu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hongyu and Chen, Jinyu and Wei, Ziyu and Huang, Shaofei and Hui, Tianrui and Gao, Jialin and Wei, Xiaoming and Liu, Si}, title = {LLaVA-ST: A Multimodal Large Language Model for Fine-Grained Spatial-Temporal Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8592-8603} }
Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models: Yuhao Dong,

Zuyan Liu,

Hai-Long Sun,

Jingkang Yang,

Winston Hu,

Yongming Rao,

Ziwei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Yuhao and Liu, Zuyan and Sun, Hai-Long and Yang, Jingkang and Hu, Winston and Rao, Yongming and Liu, Ziwei}, title = {Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9062-9072} }
MaIR: A Locality- and Continuity-Preserving Mamba for Image Restoration: Boyun Li,

Haiyu Zhao,

Wenxin Wang,

Peng Hu,

Yuanbiao Gou,

Xi Peng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Boyun and Zhao, Haiyu and Wang, Wenxin and Hu, Peng and Gou, Yuanbiao and Peng, Xi}, title = {MaIR: A Locality- and Continuity-Preserving Mamba for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7491-7501} }
Few-shot Implicit Function Generation via Equivariance: Suizhi Huang,

Xingyi Yang,

Hongtao Lu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Suizhi and Yang, Xingyi and Lu, Hongtao and Wang, Xinchao}, title = {Few-shot Implicit Function Generation via Equivariance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16262-16272} }
RSAR: Restricted State Angle Resolver and Rotated SAR Benchmark: Xin Zhang,

Xue Yang,

Yuxuan Li,

Jian Yang,

Ming-Ming Cheng,

Xiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Yang, Xue and Li, Yuxuan and Yang, Jian and Cheng, Ming-Ming and Li, Xiang}, title = {RSAR: Restricted State Angle Resolver and Rotated SAR Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7416-7426} }
Dual Energy-Based Model with Open-World Uncertainty Estimation for Out-of-distribution Detection: Qi Chen,

Hu Ding; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Qi and Ding, Hu}, title = {Dual Energy-Based Model with Open-World Uncertainty Estimation for Out-of-distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25728-25737} }
DTGBrepGen: A Novel B-rep Generative Model through Decoupling Topology and Geometry: Jing Li,

Yihang Fu,

Falai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jing and Fu, Yihang and Chen, Falai}, title = {DTGBrepGen: A Novel B-rep Generative Model through Decoupling Topology and Geometry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21438-21447} }
Continuous Space-Time Video Resampling with Invertible Motion Steganography: Yuantong Zhang,

Zhenzhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuantong and Chen, Zhenzhong}, title = {Continuous Space-Time Video Resampling with Invertible Motion Steganography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2116-2126} }
Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation: Zilyu Ye,

Zhiyang Chen,

Tiancheng Li,

Zemin Huang,

Weijian Luo,

Guo-Jun Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Zilyu and Chen, Zhiyang and Li, Tiancheng and Huang, Zemin and Luo, Weijian and Qi, Guo-Jun}, title = {Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23412-23422} }
Learning Audio-guided Video Representation with Gated Attention for Video-Text Retrieval: Boseung Jeong,

Jicheol Park,

Sungyeon Kim,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Boseung and Park, Jicheol and Kim, Sungyeon and Kwak, Suha}, title = {Learning Audio-guided Video Representation with Gated Attention for Video-Text Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26202-26211} }
ProtoDepth: Unsupervised Continual Depth Completion with Prototypes: Patrick Rim,

Hyoungseob Park,

S. Gangopadhyay,

Ziyao Zeng,

Younjoon Chung,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rim_2025_CVPR, author = {Rim, Patrick and Park, Hyoungseob and Gangopadhyay, S. and Zeng, Ziyao and Chung, Younjoon and Wong, Alex}, title = {ProtoDepth: Unsupervised Continual Depth Completion with Prototypes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6304-6316} }
vesselFM: A Foundation Model for Universal 3D Blood Vessel Segmentation: Bastian Wittmann,

Yannick Wattenberg,

Tamaz Amiranashvili,

Suprosanna Shit,

Bjoern Menze; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wittmann_2025_CVPR, author = {Wittmann, Bastian and Wattenberg, Yannick and Amiranashvili, Tamaz and Shit, Suprosanna and Menze, Bjoern}, title = {vesselFM: A Foundation Model for Universal 3D Blood Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20874-20884} }
TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation: Hongxiang Zhao,

Xingchen Liu,

Mutian Xu,

Yiming Hao,

Weikai Chen,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hongxiang and Liu, Xingchen and Xu, Mutian and Hao, Yiming and Chen, Weikai and Han, Xiaoguang}, title = {TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27683-27693} }
NoT: Federated Unlearning via Weight Negation: Yasser H. Khalil,

Leo Brunswic,

Soufiane Lamghari,

Xu Li,

Mahdi Beitollahi,

Xi Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khalil_2025_CVPR, author = {Khalil, Yasser H. and Brunswic, Leo and Lamghari, Soufiane and Li, Xu and Beitollahi, Mahdi and Chen, Xi}, title = {NoT: Federated Unlearning via Weight Negation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25759-25769} }
ParaHome: Parameterizing Everyday Home Activities Towards 3D Generative Modeling of Human-Object Interactions: Jeonghwan Kim,

Jisoo Kim,

Jeonghyeon Na,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jeonghwan and Kim, Jisoo and Na, Jeonghyeon and Joo, Hanbyul}, title = {ParaHome: Parameterizing Everyday Home Activities Towards 3D Generative Modeling of Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1816-1828} }
Adapting to the Unknown: Training-Free Audio-Visual Event Perception with Dynamic Thresholds: Eitan Shaar,

Ariel Shaulov,

Gal Chechik,

Lior Wolf; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shaar_2025_CVPR, author = {Shaar, Eitan and Shaulov, Ariel and Chechik, Gal and Wolf, Lior}, title = {Adapting to the Unknown: Training-Free Audio-Visual Event Perception with Dynamic Thresholds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3142-3151} }
OpenHumanVid: A Large-Scale High-Quality Dataset for Enhancing Human-Centric Video Generation: Hui Li,

Mingwang Xu,

Yun Zhan,

Shan Mu,

Jiaye Li,

Kaihui Cheng,

Yuxuan Chen,

Tan Chen,

Mao Ye,

Jingdong Wang,

Siyu Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hui and Xu, Mingwang and Zhan, Yun and Mu, Shan and Li, Jiaye and Cheng, Kaihui and Chen, Yuxuan and Chen, Tan and Ye, Mao and Wang, Jingdong and Zhu, Siyu}, title = {OpenHumanVid: A Large-Scale High-Quality Dataset for Enhancing Human-Centric Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7752-7762} }
Classifier-Free Guidance Inside the Attraction Basin May Cause Memorization: Anubhav Jain,

Yuya Kobayashi,

Takashi Shibuya,

Yuhta Takida,

Nasir Memon,

Julian Togelius,

Yuki Mitsufuji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jain_2025_CVPR, author = {Jain, Anubhav and Kobayashi, Yuya and Shibuya, Takashi and Takida, Yuhta and Memon, Nasir and Togelius, Julian and Mitsufuji, Yuki}, title = {Classifier-Free Guidance Inside the Attraction Basin May Cause Memorization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12871-12879} }
Track Any Anomalous Object:A Granular Video Anomaly Detection Pipeline: Yuzhi Huang,

Chenxin Li,

Haitao Zhang,

Zixu Lin,

Yunlong Lin,

Hengyu Liu,

Wuyang Li,

Xinyu Liu,

Jiechao Gao,

Yue Huang,

Xinghao Ding,

Yixuan Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yuzhi and Li, Chenxin and Zhang, Haitao and Lin, Zixu and Lin, Yunlong and Liu, Hengyu and Li, Wuyang and Liu, Xinyu and Gao, Jiechao and Huang, Yue and Ding, Xinghao and Yuan, Yixuan}, title = {Track Any Anomalous Object:A Granular Video Anomaly Detection Pipeline}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8689-8699} }
RANGE: Retrieval Augmented Neural Fields for Multi-Resolution Geo-Embeddings: Aayush Dhakal,

Srikumar Sastry,

Subash Khanal,

Adeel Ahmad,

Eric Xing,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dhakal_2025_CVPR, author = {Dhakal, Aayush and Sastry, Srikumar and Khanal, Subash and Ahmad, Adeel and Xing, Eric and Jacobs, Nathan}, title = {RANGE: Retrieval Augmented Neural Fields for Multi-Resolution Geo-Embeddings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24680-24689} }
Magma: A Foundation Model for Multimodal AI Agents: Jianwei Yang,

Reuben Tan,

Qianhui Wu,

Ruijie Zheng,

Baolin Peng,

Yongyuan Liang,

Yu Gu,

Mu Cai,

Seonghyeon Ye,

Joel Jang,

Yuquan Deng,

Jianfeng Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jianwei and Tan, Reuben and Wu, Qianhui and Zheng, Ruijie and Peng, Baolin and Liang, Yongyuan and Gu, Yu and Cai, Mu and Ye, Seonghyeon and Jang, Joel and Deng, Yuquan and Gao, Jianfeng}, title = {Magma: A Foundation Model for Multimodal AI Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14203-14214} }
SimMotionEdit: Text-Based Human Motion Editing with Motion Similarity Prediction: Zhengyuan Li,

Kai Cheng,

Anindita Ghosh,

Uttaran Bhattacharya,

Liangyan Gui,

Aniket Bera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhengyuan and Cheng, Kai and Ghosh, Anindita and Bhattacharya, Uttaran and Gui, Liangyan and Bera, Aniket}, title = {SimMotionEdit: Text-Based Human Motion Editing with Motion Similarity Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27827-27837} }
Object-aware Sound Source Localization via Audio-Visual Scene Understanding: Sung Jin Um,

Dongjin Kim,

Sangmin Lee,

Jung Uk Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Um_2025_CVPR, author = {Um, Sung Jin and Kim, Dongjin and Lee, Sangmin and Kim, Jung Uk}, title = {Object-aware Sound Source Localization via Audio-Visual Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8342-8351} }
Volume Tells: Dual Cycle-Consistent Diffusion for 3D Fluorescence Microscopy De-noising and Super-Resolution: Zelin Li,

Chenwei Wang,

Zhaoke Huang,

Yiming Ma,

Cunming Zhao,

Zhongying Zhao,

Hong Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zelin and Wang, Chenwei and Huang, Zhaoke and Ma, Yiming and Zhao, Cunming and Zhao, Zhongying and Yan, Hong}, title = {Volume Tells: Dual Cycle-Consistent Diffusion for 3D Fluorescence Microscopy De-noising and Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16091-16100} }
SerialGen: Personalized Image Generation by First Standardization Then Personalization: Cong Xie,

Han Zou,

Ruiqi Yu,

Yan Zhang,

Zhenpeng Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Cong and Zou, Han and Yu, Ruiqi and Zhang, Yan and Zhan, Zhenpeng}, title = {SerialGen: Personalized Image Generation by First Standardization Then Personalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2847-2856} }
From Head to Tail: Efficient Black-box Model Inversion Attack via Long-tailed Learning: Ziang Li,

Hongguang Zhang,

Juan Wang,

Meihui Chen,

Hongxin Hu,

Wenzhe Yi,

Xiaoyang Xu,

Mengda Yang,

Chenjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ziang and Zhang, Hongguang and Wang, Juan and Chen, Meihui and Hu, Hongxin and Yi, Wenzhe and Xu, Xiaoyang and Yang, Mengda and Ma, Chenjun}, title = {From Head to Tail: Efficient Black-box Model Inversion Attack via Long-tailed Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29288-29298} }
Augmented Deep Contexts for Spatially Embedded Video Coding: Yifan Bian,

Chuanbo Tang,

Li Li,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bian_2025_CVPR, author = {Bian, Yifan and Tang, Chuanbo and Li, Li and Liu, Dong}, title = {Augmented Deep Contexts for Spatially Embedded Video Coding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2094-2104} }
SIDA: Social Media Image Deepfake Detection, Localization and Explanation with Large Multimodal Model: Zhenglin Huang,

Jinwei Hu,

Xiangtai Li,

Yiwei He,

Xingyu Zhao,

Bei Peng,

Baoyuan Wu,

Xiaowei Huang,

Guangliang Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhenglin and Hu, Jinwei and Li, Xiangtai and He, Yiwei and Zhao, Xingyu and Peng, Bei and Wu, Baoyuan and Huang, Xiaowei and Cheng, Guangliang}, title = {SIDA: Social Media Image Deepfake Detection, Localization and Explanation with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28831-28841} }
Matrix3D: Large Photogrammetry Model All-in-One: Yuanxun Lu,

Jingyang Zhang,

Tian Fang,

Jean-Daniel Nahmias,

Yanghai Tsin,

Long Quan,

Xun Cao,

Yao Yao,

Shiwei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yuanxun and Zhang, Jingyang and Fang, Tian and Nahmias, Jean-Daniel and Tsin, Yanghai and Quan, Long and Cao, Xun and Yao, Yao and Li, Shiwei}, title = {Matrix3D: Large Photogrammetry Model All-in-One}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11250-11263} }
Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation: Xiaoqi Li,

Jingyun Xu,

Mingxu Zhang,

Jiaming Liu,

Yan Shen,

Iaroslav Ponomarenko,

Jiahui Xu,

Liang Heng,

Siyuan Huang,

Shanghang Zhang,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaoqi and Xu, Jingyun and Zhang, Mingxu and Liu, Jiaming and Shen, Yan and Ponomarenko, Iaroslav and Xu, Jiahui and Heng, Liang and Huang, Siyuan and Zhang, Shanghang and Dong, Hao}, title = {Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27638-27648} }
Proximal Algorithm Unrolling: Flexible and Efficient Reconstruction Networks for Single-Pixel Imaging: Ping Wang,

Lishun Wang,

Gang Qu,

Xiaodong Wang,

Yulun Zhang,

Xin Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ping and Wang, Lishun and Qu, Gang and Wang, Xiaodong and Zhang, Yulun and Yuan, Xin}, title = {Proximal Algorithm Unrolling: Flexible and Efficient Reconstruction Networks for Single-Pixel Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {411-421} }
3DEnhancer: Consistent Multi-View Diffusion for 3D Enhancement: Yihang Luo,

Shangchen Zhou,

Yushi Lan,

Xingang Pan,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Yihang and Zhou, Shangchen and Lan, Yushi and Pan, Xingang and Loy, Chen Change}, title = {3DEnhancer: Consistent Multi-View Diffusion for 3D Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16430-16440} }
Investigating the Role of Weight Decay in Enhancing Nonconvex SGD: Tao Sun,

Yuhao Huang,

Li Shen,

Kele Xu,

Bao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Tao and Huang, Yuhao and Shen, Li and Xu, Kele and Wang, Bao}, title = {Investigating the Role of Weight Decay in Enhancing Nonconvex SGD}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15287-15296} }
MarkushGrapher: Joint Visual and Textual Recognition of Markush Structures: Lucas Morin,

Valery Weber,

Ahmed Nassar,

Gerhard Ingmar Meijer,

Luc Van Gool,

Yawei Li,

Peter Staar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morin_2025_CVPR, author = {Morin, Lucas and Weber, Valery and Nassar, Ahmed and Meijer, Gerhard Ingmar and Van Gool, Luc and Li, Yawei and Staar, Peter}, title = {MarkushGrapher: Joint Visual and Textual Recognition of Markush Structures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14505-14515} }
Depth Any Camera: Zero-Shot Metric Depth Estimation from Any Camera: Yuliang Guo,

Sparsh Garg,

S. Mahdi H. Miangoleh,

Xinyu Huang,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yuliang and Garg, Sparsh and Miangoleh, S. Mahdi H. and Huang, Xinyu and Ren, Liu}, title = {Depth Any Camera: Zero-Shot Metric Depth Estimation from Any Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26996-27006} }
Image Quality Assessment: From Human to Machine Preference: Chunyi Li,

Yuan Tian,

Xiaoyue Ling,

Zicheng Zhang,

Haodong Duan,

Haoning Wu,

Ziheng Jia,

Xiaohong Liu,

Xiongkuo Min,

Guo Lu,

Weisi Lin,

Guangtao Zhai; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Chunyi and Tian, Yuan and Ling, Xiaoyue and Zhang, Zicheng and Duan, Haodong and Wu, Haoning and Jia, Ziheng and Liu, Xiaohong and Min, Xiongkuo and Lu, Guo and Lin, Weisi and Zhai, Guangtao}, title = {Image Quality Assessment: From Human to Machine Preference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7570-7581} }
ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion Models: Ozgur Kara,

Krishna Kumar Singh,

Feng Liu,

Duygu Ceylan,

James M. Rehg,

Tobias Hinz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kara_2025_CVPR, author = {Kara, Ozgur and Singh, Krishna Kumar and Liu, Feng and Ceylan, Duygu and Rehg, James M. and Hinz, Tobias}, title = {ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28405-28415} }
Context-Aware Multimodal Pretraining: Karsten Roth,

Zeynep Akata,

Dima Damen,

Ivana Balazevic,

Olivier J. Henaff; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Roth_2025_CVPR, author = {Roth, Karsten and Akata, Zeynep and Damen, Dima and Balazevic, Ivana and Henaff, Olivier J.}, title = {Context-Aware Multimodal Pretraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4267-4279} }
Sound Bridge: Associating Egocentric and Exocentric Videos via Audio Cues: Sihong Huang,

Jiaxin Wu,

Xiaoyong Wei,

Yi Cai,

Dongmei Jiang,

Yaowei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Sihong and Wu, Jiaxin and Wei, Xiaoyong and Cai, Yi and Jiang, Dongmei and Wang, Yaowei}, title = {Sound Bridge: Associating Egocentric and Exocentric Videos via Audio Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28942-28951} }
Detecting Backdoor Attacks in Federated Learning via Direction Alignment Inspection: Jiahao Xu,

Zikai Zhang,

Rui Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jiahao and Zhang, Zikai and Hu, Rui}, title = {Detecting Backdoor Attacks in Federated Learning via Direction Alignment Inspection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20654-20664} }
OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations: Linke Ouyang,

Yuan Qu,

Hongbin Zhou,

Jiawei Zhu,

Rui Zhang,

Qunshu Lin,

Bin Wang,

Zhiyuan Zhao,

Man Jiang,

Xiaomeng Zhao,

Jin Shi,

Fan Wu,

Pei Chu,

Minghao Liu,

Zhenxiang Li,

Chao Xu,

Bo Zhang,

Botian Shi,

Zhongying Tu,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2025_CVPR, author = {Ouyang, Linke and Qu, Yuan and Zhou, Hongbin and Zhu, Jiawei and Zhang, Rui and Lin, Qunshu and Wang, Bin and Zhao, Zhiyuan and Jiang, Man and Zhao, Xiaomeng and Shi, Jin and Wu, Fan and Chu, Pei and Liu, Minghao and Li, Zhenxiang and Xu, Chao and Zhang, Bo and Shi, Botian and Tu, Zhongying and He, Conghui}, title = {OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24838-24848} }
LayoutVLM: Differentiable Optimization of 3D Layout via Vision-Language Models: Fan-Yun Sun,

Weiyu Liu,

Siyi Gu,

Dylan Lim,

Goutam Bhat,

Federico Tombari,

Manling Li,

Nick Haber,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Fan-Yun and Liu, Weiyu and Gu, Siyi and Lim, Dylan and Bhat, Goutam and Tombari, Federico and Li, Manling and Haber, Nick and Wu, Jiajun}, title = {LayoutVLM: Differentiable Optimization of 3D Layout via Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29469-29478} }
Point Clouds Meets Physics: Dynamic Acoustic Field Fitting Network for Point Cloud Understanding: Changshuo Wang,

Shuting He,

Xiang Fang,

Jiawei Han,

Zhonghang Liu,

Xin Ning,

Weijun Li,

Prayag Tiwari; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Changshuo and He, Shuting and Fang, Xiang and Han, Jiawei and Liu, Zhonghang and Ning, Xin and Li, Weijun and Tiwari, Prayag}, title = {Point Clouds Meets Physics: Dynamic Acoustic Field Fitting Network for Point Cloud Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22182-22192} }
Faster Parameter-Efficient Tuning with Token Redundancy Reduction: Kwonyoung Kim,

Jungin Park,

Jin Kim,

Hyeongjun Kwon,

Kwanghoon Sohn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Kwonyoung and Park, Jungin and Kim, Jin and Kwon, Hyeongjun and Sohn, Kwanghoon}, title = {Faster Parameter-Efficient Tuning with Token Redundancy Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30189-30198} }
BlockDance: Reuse Structurally Similar Spatio-Temporal Features to Accelerate Diffusion Transformers: Hui Zhang,

Tingwei Gao,

Jie Shao,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hui and Gao, Tingwei and Shao, Jie and Wu, Zuxuan}, title = {BlockDance: Reuse Structurally Similar Spatio-Temporal Features to Accelerate Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12891-12900} }
Panorama Generation From NFoV Image Done Right: Dian Zheng,

Cheng Zhang,

Xiao-Ming Wu,

Cao Li,

Chengfei Lv,

Jian-Fang Hu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Dian and Zhang, Cheng and Wu, Xiao-Ming and Li, Cao and Lv, Chengfei and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Panorama Generation From NFoV Image Done Right}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21610-21619} }
Mamba-Adaptor: State Space Model Adaptor for Visual Recognition: Fei Xie,

Jiahao Nie,

Yujin Tang,

Wenkang Zhang,

Hongshen Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Fei and Nie, Jiahao and Tang, Yujin and Zhang, Wenkang and Zhao, Hongshen}, title = {Mamba-Adaptor: State Space Model Adaptor for Visual Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20124-20134} }
Robust Message Embedding via Attention Flow-Based Steganography: Huayuan Ye,

Shenzhuo Zhang,

Shiqi Jiang,

Jing Liao,

Shuhang Gu,

Dejun Zheng,

Changbo Wang,

Chenhui Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Huayuan and Zhang, Shenzhuo and Jiang, Shiqi and Liao, Jing and Gu, Shuhang and Zheng, Dejun and Wang, Changbo and Li, Chenhui}, title = {Robust Message Embedding via Attention Flow-Based Steganography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12840-12849} }
Task-driven Image Fusion with Learnable Fusion Loss: Haowen Bai,

Jiangshe Zhang,

Zixiang Zhao,

Yichen Wu,

Lilun Deng,

Yukun Cui,

Tao Feng,

Shuang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Haowen and Zhang, Jiangshe and Zhao, Zixiang and Wu, Yichen and Deng, Lilun and Cui, Yukun and Feng, Tao and Xu, Shuang}, title = {Task-driven Image Fusion with Learnable Fusion Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7457-7468} }
Compositional Targeted Multi-Label Universal Perturbations: Hassan Mahmood,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Mahmood_2025_CVPR, author = {Mahmood, Hassan and Elhamifar, Ehsan}, title = {Compositional Targeted Multi-Label Universal Perturbations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20580-20591} }
PatchGuard: Adversarially Robust Anomaly Detection and Localization through Vision Transformers and Pseudo Anomalies: Mojtaba Nafez,

Amirhossein Koochakian,

Arad Maleki,

Jafar Habibi,

Mohammad Hossein Rohban; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nafez_2025_CVPR, author = {Nafez, Mojtaba and Koochakian, Amirhossein and Maleki, Arad and Habibi, Jafar and Rohban, Mohammad Hossein}, title = {PatchGuard: Adversarially Robust Anomaly Detection and Localization through Vision Transformers and Pseudo Anomalies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20383-20394} }
Sparse Point Cloud Patches Rendering via Splitting 2D Gaussians: Changfeng Ma,

Ran Bi,

Jie Guo,

Chongjun Wang,

Yanwen Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Changfeng and Bi, Ran and Guo, Jie and Wang, Chongjun and Guo, Yanwen}, title = {Sparse Point Cloud Patches Rendering via Splitting 2D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27285-27294} }
Distilling Monocular Foundation Model for Fine-grained Depth Completion: Yingping Liang,

Yutao Hu,

Wenqi Shao,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yingping and Hu, Yutao and Shao, Wenqi and Fu, Ying}, title = {Distilling Monocular Foundation Model for Fine-grained Depth Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22254-22265} }
LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant: Yikun Liu,

Yajie Zhang,

Jiayin Cai,

Xiaolong Jiang,

Yao Hu,

Jiangchao Yao,

Yanfeng Wang,

Weidi Xie; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yikun and Zhang, Yajie and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Yao, Jiangchao and Wang, Yanfeng and Xie, Weidi}, title = {LamRA: Large Multimodal Model as Your Advanced Retrieval Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4015-4025} }
AniGrad: Anisotropic Gradient-Adaptive Sampling for 3D Reconstruction From Monocular Video: Noah Stier,

Alex Rich,

Pradeep Sen,

Tobias Höllerer; [pdf] [supp]
[bibtex]
@InProceedings{Stier_2025_CVPR, author = {Stier, Noah and Rich, Alex and Sen, Pradeep and H\"ollerer, Tobias}, title = {AniGrad: Anisotropic Gradient-Adaptive Sampling for 3D Reconstruction From Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21814-21823} }
Neural Video Compression with Context Modulation: Chuanbo Tang,

Zhuoyuan Li,

Yifan Bian,

Li Li,

Dong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Chuanbo and Li, Zhuoyuan and Bian, Yifan and Li, Li and Liu, Dong}, title = {Neural Video Compression with Context Modulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12553-12563} }
Less Attention is More: Prompt Transformer for Generalized Category Discovery: Wei Zhang,

Baopeng Zhang,

Zhu Teng,

Wenxin Luo,

Junnan Zou,

Jianping Fan; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wei and Zhang, Baopeng and Teng, Zhu and Luo, Wenxin and Zou, Junnan and Fan, Jianping}, title = {Less Attention is More: Prompt Transformer for Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30322-30331} }
On-Device Self-Supervised Learning of Low-Latency Monocular Depth from Only Events: Jesse J. Hagenaars,

Yilun Wu,

Federico Paredes-Valles,

Stein Stroobants,

Guido C.H.E. de Croon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hagenaars_2025_CVPR, author = {Hagenaars, Jesse J. and Wu, Yilun and Paredes-Valles, Federico and Stroobants, Stein and de Croon, Guido C.H.E.}, title = {On-Device Self-Supervised Learning of Low-Latency Monocular Depth from Only Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17114-17123} }
CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal Understanding and Generation: Wei Chen,

Lin Li,

Yongqi Yang,

Bin Wen,

Fan Yang,

Tingting Gao,

Yu Wu,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Wei and Li, Lin and Yang, Yongqi and Wen, Bin and Yang, Fan and Gao, Tingting and Wu, Yu and Chen, Long}, title = {CoMM: A Coherent Interleaved Image-Text Dataset for Multimodal Understanding and Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8073-8082} }
MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision: Ruicheng Wang,

Sicheng Xu,

Cassie Dai,

Jianfeng Xiang,

Yu Deng,

Xin Tong,

Jiaolong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ruicheng and Xu, Sicheng and Dai, Cassie and Xiang, Jianfeng and Deng, Yu and Tong, Xin and Yang, Jiaolong}, title = {MoGe: Unlocking Accurate Monocular Geometry Estimation for Open-Domain Images with Optimal Training Supervision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5261-5271} }
Beyond Background Shift: Rethinking Instance Replay in Continual Semantic Segmentation: Hongmei Yin,

Tingliang Feng,

Fan Lyu,

Fanhua Shang,

Hongying Liu,

Wei Feng,

Liang Wan; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Hongmei and Feng, Tingliang and Lyu, Fan and Shang, Fanhua and Liu, Hongying and Feng, Wei and Wan, Liang}, title = {Beyond Background Shift: Rethinking Instance Replay in Continual Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9839-9848} }
ScaleLSD: Scalable Deep Line Segment Detection Streamlined: Zeran Ke,

Bin Tan,

Xianwei Zheng,

Yujun Shen,

Tianfu Wu,

Nan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Zeran and Tan, Bin and Zheng, Xianwei and Shen, Yujun and Wu, Tianfu and Xue, Nan}, title = {ScaleLSD: Scalable Deep Line Segment Detection Streamlined}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6327-6336} }
AToM: Aligning Text-to-Motion Model at Event-Level with GPT-4Vision Reward: Haonan Han,

Xiangzuo Wu,

Huan Liao,

Zunnan Xu,

Zhongyuan Hu,

Ronghui Li,

Yachao Zhang,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Haonan and Wu, Xiangzuo and Liao, Huan and Xu, Zunnan and Hu, Zhongyuan and Li, Ronghui and Zhang, Yachao and Li, Xiu}, title = {AToM: Aligning Text-to-Motion Model at Event-Level with GPT-4Vision Reward}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22746-22755} }
Revisiting MAE Pre-training for 3D Medical Image Segmentation: Tassilo Wald,

Constantin Ulrich,

Stanislav Lukyanenko,

Andrei Goncharov,

Alberto Paderno,

Maximilian Miller,

Leander Maerkisch,

Paul Jaeger,

Klaus Maier-Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wald_2025_CVPR, author = {Wald, Tassilo and Ulrich, Constantin and Lukyanenko, Stanislav and Goncharov, Andrei and Paderno, Alberto and Miller, Maximilian and Maerkisch, Leander and Jaeger, Paul and Maier-Hein, Klaus}, title = {Revisiting MAE Pre-training for 3D Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5186-5196} }
Learning with Noisy Triplet Correspondence for Composed Image Retrieval: Shuxian Li,

Changhao He,

Xiting Liu,

Joey Tianyi Zhou,

Xi Peng,

Peng Hu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shuxian and He, Changhao and Liu, Xiting and Zhou, Joey Tianyi and Peng, Xi and Hu, Peng}, title = {Learning with Noisy Triplet Correspondence for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19628-19637} }
DoF-Gaussian: Controllable Depth-of-Field for 3D Gaussian Splatting: Liao Shen,

Tianqi Liu,

Huiqiang Sun,

Jiaqi Li,

Zhiguo Cao,

Wei Li,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Liao and Liu, Tianqi and Sun, Huiqiang and Li, Jiaqi and Cao, Zhiguo and Li, Wei and Loy, Chen Change}, title = {DoF-Gaussian: Controllable Depth-of-Field for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26462-26471} }
Parallelized Autoregressive Visual Generation: Yuqing Wang,

Shuhuai Ren,

Zhijie Lin,

Yujin Han,

Haoyuan Guo,

Zhenheng Yang,

Difan Zou,

Jiashi Feng,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuqing and Ren, Shuhuai and Lin, Zhijie and Han, Yujin and Guo, Haoyuan and Yang, Zhenheng and Zou, Difan and Feng, Jiashi and Liu, Xihui}, title = {Parallelized Autoregressive Visual Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12955-12965} }
CGMatch: A Different Perspective of Semi-supervised Learning: Bo Cheng,

Jueqing Lu,

Yuan Tian,

Haifeng Zhao,

Yi Chang,

Lan Du; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Bo and Lu, Jueqing and Tian, Yuan and Zhao, Haifeng and Chang, Yi and Du, Lan}, title = {CGMatch: A Different Perspective of Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15381-15391} }
ChatHuman: Chatting about 3D Humans with Tools: Jing Lin,

Yao Feng,

Weiyang Liu,

Michael J. Black; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jing and Feng, Yao and Liu, Weiyang and Black, Michael J.}, title = {ChatHuman: Chatting about 3D Humans with Tools}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8150-8161} }
FIction: 4D Future Interaction Prediction from Video: Kumar Ashutosh,

Georgios Pavlakos,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ashutosh_2025_CVPR, author = {Ashutosh, Kumar and Pavlakos, Georgios and Grauman, Kristen}, title = {FIction: 4D Future Interaction Prediction from Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17613-17625} }
D^2iT: Dynamic Diffusion Transformer for Accurate Image Generation: Weinan Jia,

Mengqi Huang,

Nan Chen,

Lei Zhang,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Weinan and Huang, Mengqi and Chen, Nan and Zhang, Lei and Mao, Zhendong}, title = {D{\textasciicircum}2iT: Dynamic Diffusion Transformer for Accurate Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12860-12870} }
Scalable Autoregressive Monocular Depth Estimation: Jinhong Wang,

Jian Liu,

Dongqi Tang,

Weiqiang Wang,

Wentong Li,

Danny Chen,

Jintai Chen,

Jian Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jinhong and Liu, Jian and Tang, Dongqi and Wang, Weiqiang and Li, Wentong and Chen, Danny and Chen, Jintai and Wu, Jian}, title = {Scalable Autoregressive Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6262-6272} }
Reconciling Stochastic and Deterministic Strategies for Zero-shot Image Restoration using Diffusion Model in Dual: Chong Wang,

Lanqing Guo,

Zixuan Fu,

Siyuan Yang,

Hao Cheng,

Alex C. Kot,

Bihan Wen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Chong and Guo, Lanqing and Fu, Zixuan and Yang, Siyuan and Cheng, Hao and Kot, Alex C. and Wen, Bihan}, title = {Reconciling Stochastic and Deterministic Strategies for Zero-shot Image Restoration using Diffusion Model in Dual}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23207-23216} }
Hierarchical Flow Diffusion for Efficient Frame Interpolation: Yang Hai,

Guo Wang,

Tan Su,

Wenjie Jiang,

Yinlin Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Hai_2025_CVPR, author = {Hai, Yang and Wang, Guo and Su, Tan and Jiang, Wenjie and Hu, Yinlin}, title = {Hierarchical Flow Diffusion for Efficient Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22943-22952} }
Recurrence-Enhanced Vision-and-Language Transformers for Robust Multimodal Document Retrieval: Davide Caffagni,

Sara Sarto,

Marcella Cornia,

Lorenzo Baraldi,

Rita Cucchiara; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Caffagni_2025_CVPR, author = {Caffagni, Davide and Sarto, Sara and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {Recurrence-Enhanced Vision-and-Language Transformers for Robust Multimodal Document Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9286-9295} }
Camouflage Anything: Learning to Hide using Controlled Out-painting and Representation Engineering: Biplab Das,

Viswanath Gopalakrishnan; [pdf] [supp]
[bibtex]
@InProceedings{Das_2025_CVPR, author = {Das, Biplab and Gopalakrishnan, Viswanath}, title = {Camouflage Anything: Learning to Hide using Controlled Out-painting and Representation Engineering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3603-3613} }
Test-Time Fine-Tuning of Image Compression Models for Multi-Task Adaptability: Unki Park,

Seongmoon Jeong,

Youngchan Jang,

Gyeong-Moon Park,

Jong Hwan Ko; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Unki and Jeong, Seongmoon and Jang, Youngchan and Park, Gyeong-Moon and Ko, Jong Hwan}, title = {Test-Time Fine-Tuning of Image Compression Models for Multi-Task Adaptability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4430-4440} }
BASKET: A Large-Scale Video Dataset for Fine-Grained Skill Estimation: Yulu Pan,

Ce Zhang,

Gedas Bertasius; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Yulu and Zhang, Ce and Bertasius, Gedas}, title = {BASKET: A Large-Scale Video Dataset for Fine-Grained Skill Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28952-28962} }
AniDoc: Animation Creation Made Easier: Yihao Meng,

Hao Ouyang,

Hanlin Wang,

Qiuyu Wang,

Wen Wang,

Ka Leong Cheng,

Zhiheng Liu,

Yujun Shen,

Huamin Qu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Yihao and Ouyang, Hao and Wang, Hanlin and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Liu, Zhiheng and Shen, Yujun and Qu, Huamin}, title = {AniDoc: Animation Creation Made Easier}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18187-18197} }
DynPose: Largely Improving the Efficiency of Human Pose Estimation by a Simple Dynamic Framework: Yalong Xu,

Lin Zhao,

Chen Gong,

Guangyu Li,

Di Wang,

Nannan Wang; [pdf]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yalong and Zhao, Lin and Gong, Chen and Li, Guangyu and Wang, Di and Wang, Nannan}, title = {DynPose: Largely Improving the Efficiency of Human Pose Estimation by a Simple Dynamic Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1160-1169} }
Arbitrary-steps Image Super-resolution via Diffusion Inversion: Zongsheng Yue,

Kang Liao,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2025_CVPR, author = {Yue, Zongsheng and Liao, Kang and Loy, Chen Change}, title = {Arbitrary-steps Image Super-resolution via Diffusion Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23153-23163} }
LiSu: A Dataset and Method for LiDAR Surface Normal Estimation: Dušan Malić,

Christian Fruhwirth-Reisinger,

Samuel Schulter,

Horst Possegger; [pdf] [supp]
[bibtex]
@InProceedings{Malic_2025_CVPR, author = {Mali\'c, Du\v{s}an and Fruhwirth-Reisinger, Christian and Schulter, Samuel and Possegger, Horst}, title = {LiSu: A Dataset and Method for LiDAR Surface Normal Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17039-17049} }
Dynamic Neural Surfaces for Elastic 4D Shape Representation and Analysis: Awais Nizamani,

Hamid Laga,

Guanjin Wang,

Farid Boussaid,

Mohammed Bennamoun,

Anuj Srivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nizamani_2025_CVPR, author = {Nizamani, Awais and Laga, Hamid and Wang, Guanjin and Boussaid, Farid and Bennamoun, Mohammed and Srivastava, Anuj}, title = {Dynamic Neural Surfaces for Elastic 4D Shape Representation and Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21783-21792} }
Spk2SRImgNet: Super-Resolve Dynamic Scene from Spike Stream via Motion Aligned Collaborative Filtering: Yuanlin Wang,

Yiyang Zhang,

Ruiqin Xiong,

Jing Zhao,

Jian Zhang,

Xiaopeng Fan,

Tiejun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuanlin and Zhang, Yiyang and Xiong, Ruiqin and Zhao, Jing and Zhang, Jian and Fan, Xiaopeng and Huang, Tiejun}, title = {Spk2SRImgNet: Super-Resolve Dynamic Scene from Spike Stream via Motion Aligned Collaborative Filtering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11416-11426} }
ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously Designing Collaborative AI Systems: Xiangyuan Xue,

Zeyu Lu,

Di Huang,

Zidong Wang,

Wanli Ouyang,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Xiangyuan and Lu, Zeyu and Huang, Di and Wang, Zidong and Ouyang, Wanli and Bai, Lei}, title = {ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously Designing Collaborative AI Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24614-24624} }
VideoGLaMM : A Large Multimodal Model for Pixel-Level Visual Grounding in Videos: Shehan Munasinghe,

Hanan Gani,

Wenqi Zhu,

Jiale Cao,

Eric Xing,

Fahad Shahbaz Khan,

Salman Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Munasinghe_2025_CVPR, author = {Munasinghe, Shehan and Gani, Hanan and Zhu, Wenqi and Cao, Jiale and Xing, Eric and Khan, Fahad Shahbaz and Khan, Salman}, title = {VideoGLaMM : A Large Multimodal Model for Pixel-Level Visual Grounding in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19036-19046} }
Incomplete Multi-View Multi-label Learning via Disentangled Representation and Label Semantic Embedding: Xu Yan,

Jun Yin,

Jie Wen; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Xu and Yin, Jun and Wen, Jie}, title = {Incomplete Multi-View Multi-label Learning via Disentangled Representation and Label Semantic Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30722-30731} }
AutoURDF: Unsupervised Robot Modeling from Point Cloud Frames Using Cluster Registration: Jiong Lin,

Lechen Zhang,

Kwansoo Lee,

Jialong Ning,

Judah Goldfeder,

Hod Lipson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jiong and Zhang, Lechen and Lee, Kwansoo and Ning, Jialong and Goldfeder, Judah and Lipson, Hod}, title = {AutoURDF: Unsupervised Robot Modeling from Point Cloud Frames Using Cluster Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27628-27637} }
ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping: Shun Iwase,

Muhammad Zubair Irshad,

Katherine Liu,

Vitor Guizilini,

Robert Lee,

Takuya Ikeda,

Ayako Amma,

Koichi Nishiwaki,

Kris Kitani,

Rares Ambrus,

Sergey Zakharov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Iwase_2025_CVPR, author = {Iwase, Shun and Irshad, Muhammad Zubair and Liu, Katherine and Guizilini, Vitor and Lee, Robert and Ikeda, Takuya and Amma, Ayako and Nishiwaki, Koichi and Kitani, Kris and Ambrus, Rares and Zakharov, Sergey}, title = {ZeroGrasp: Zero-Shot Shape Reconstruction Enabled Robotic Grasping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17405-17415} }
Golden Cudgel Network for Real-Time Semantic Segmentation: Guoyu Yang,

Yuan Wang,

Daming Shi,

Yanzhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Guoyu and Wang, Yuan and Shi, Daming and Wang, Yanzhong}, title = {Golden Cudgel Network for Real-Time Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25367-25376} }
PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation: Jingyi Tian,

Le Wang,

Sanping Zhou,

Sen Wang,

Jiayi Li,

Haowen Sun,

Wei Tang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Jingyi and Wang, Le and Zhou, Sanping and Wang, Sen and Li, Jiayi and Sun, Haowen and Tang, Wei}, title = {PDFactor: Learning Tri-Perspective View Policy Diffusion Field for Multi-Task Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15757-15767} }
VideoTree: Adaptive Tree-based Video Representation for LLM Reasoning on Long Videos: Ziyang Wang,

Shoubin Yu,

Elias Stengel-Eskin,

Jaehong Yoon,

Feng Cheng,

Gedas Bertasius,

Mohit Bansal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ziyang and Yu, Shoubin and Stengel-Eskin, Elias and Yoon, Jaehong and Cheng, Feng and Bertasius, Gedas and Bansal, Mohit}, title = {VideoTree: Adaptive Tree-based Video Representation for LLM Reasoning on Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3272-3283} }
Multi-modal Contrastive Learning with Negative Sampling Calibration for Phenotypic Drug Discovery: Jiahua Rao,

Hanjing Lin,

Leyu Chen,

Jiancong Xie,

Shuangjia Zheng,

Yuedong Yang; [pdf] [supp]
[bibtex]
@InProceedings{Rao_2025_CVPR, author = {Rao, Jiahua and Lin, Hanjing and Chen, Leyu and Xie, Jiancong and Zheng, Shuangjia and Yang, Yuedong}, title = {Multi-modal Contrastive Learning with Negative Sampling Calibration for Phenotypic Drug Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30752-30762} }
R-TPT: Improving Adversarial Robustness of Vision-Language Models through Test-Time Prompt Tuning: Lijun Sheng,

Jian Liang,

Zilei Wang,

Ran He; [pdf] [supp]
[bibtex]
@InProceedings{Sheng_2025_CVPR, author = {Sheng, Lijun and Liang, Jian and Wang, Zilei and He, Ran}, title = {R-TPT: Improving Adversarial Robustness of Vision-Language Models through Test-Time Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29958-29967} }
Distinguish Then Exploit: Source-free Open Set Domain Adaptation via Weight Barcode Estimation and Sparse Label Assignment: Weiming Liu,

Jun Dan,

Fan Wang,

Xinting Liao,

Junhao Dong,

Hua Yu,

Shunjie Dong,

Lianyong Qi; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Weiming and Dan, Jun and Wang, Fan and Liao, Xinting and Dong, Junhao and Yu, Hua and Dong, Shunjie and Qi, Lianyong}, title = {Distinguish Then Exploit: Source-free Open Set Domain Adaptation via Weight Barcode Estimation and Sparse Label Assignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4927-4938} }
Multi-Sensor Object Anomaly Detection: Unifying Appearance, Geometry, and Internal Properties: Wenqiao Li,

Bozhong Zheng,

Xiaohao Xu,

Jinye Gan,

Fading Lu,

Xiang Li,

Na Ni,

Zheng Tian,

Xiaonan Huang,

Shenghua Gao,

Yingna Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wenqiao and Zheng, Bozhong and Xu, Xiaohao and Gan, Jinye and Lu, Fading and Li, Xiang and Ni, Na and Tian, Zheng and Huang, Xiaonan and Gao, Shenghua and Wu, Yingna}, title = {Multi-Sensor Object Anomaly Detection: Unifying Appearance, Geometry, and Internal Properties}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9984-9993} }
SplatFlow: Multi-View Rectified Flow Model for 3D Gaussian Splatting Synthesis: Hyojun Go,

Byeongjun Park,

Jiho Jang,

Jin-Young Kim,

Soonwoo Kwon,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Go_2025_CVPR, author = {Go, Hyojun and Park, Byeongjun and Jang, Jiho and Kim, Jin-Young and Kwon, Soonwoo and Kim, Changick}, title = {SplatFlow: Multi-View Rectified Flow Model for 3D Gaussian Splatting Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21524-21536} }
Fancy123: One Image to High-Quality 3D Mesh Generation via Plug-and-Play Deformation: Qiao Yu,

Xianzhi Li,

Yuan Tang,

Xu Han,

Long Hu,

Yixue Hao,

Min Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Qiao and Li, Xianzhi and Tang, Yuan and Han, Xu and Hu, Long and Hao, Yixue and Chen, Min}, title = {Fancy123: One Image to High-Quality 3D Mesh Generation via Plug-and-Play Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {595-604} }
Dense Dispersed Structured Light for Hyperspectral 3D Imaging of Dynamic Scenes: Suhyun Shin,

Seungwoo Yoon,

Ryota Maeda,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_CVPR, author = {Shin, Suhyun and Yoon, Seungwoo and Maeda, Ryota and Baek, Seung-Hwan}, title = {Dense Dispersed Structured Light for Hyperspectral 3D Imaging of Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16589-16598} }
PlanarSplatting: Accurate Planar Surface Reconstruction in 3 Minutes: Bin Tan,

Rui Yu,

Yujun Shen,

Nan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Bin and Yu, Rui and Shen, Yujun and Xue, Nan}, title = {PlanarSplatting: Accurate Planar Surface Reconstruction in 3 Minutes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1190-1199} }
Omni-ID: Holistic Identity Representation Designed for Generative Tasks: Guocheng Qian,

Kuan-Chieh Wang,

Or Patashnik,

Negin Heravi,

Daniil Ostashev,

Sergey Tulyakov,

Daniel Cohen-Or,

Kfir Aberman; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Guocheng and Wang, Kuan-Chieh and Patashnik, Or and Heravi, Negin and Ostashev, Daniil and Tulyakov, Sergey and Cohen-Or, Daniel and Aberman, Kfir}, title = {Omni-ID: Holistic Identity Representation Designed for Generative Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8786-8795} }
MM-OR: A Large Multimodal Operating Room Dataset for Semantic Understanding of High-Intensity Surgical Environments: Ege Özsoy,

Chantal Pellegrini,

Tobias Czempiel,

Felix Tristram,

Kun Yuan,

David Bani-Harouni,

Ulrich Eck,

Benjamin Busam,

Matthias Keicher,

Nassir Navab; [pdf] [supp]
[bibtex]
@InProceedings{Ozsoy_2025_CVPR, author = {\"Ozsoy, Ege and Pellegrini, Chantal and Czempiel, Tobias and Tristram, Felix and Yuan, Kun and Bani-Harouni, David and Eck, Ulrich and Busam, Benjamin and Keicher, Matthias and Navab, Nassir}, title = {MM-OR: A Large Multimodal Operating Room Dataset for Semantic Understanding of High-Intensity Surgical Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19378-19389} }
MIRE: Matched Implicit Neural Representations: Dhananjaya Jayasundara,

Heng Zhao,

Demetrio Labate,

Vishal M. Patel; [pdf] [supp]
[bibtex]
@InProceedings{Jayasundara_2025_CVPR, author = {Jayasundara, Dhananjaya and Zhao, Heng and Labate, Demetrio and Patel, Vishal M.}, title = {MIRE: Matched Implicit Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8279-8288} }
AeSPa : Attention-guided Self-supervised Parallel Imaging for MRI Reconstruction: Jinho Joo,

Hyeseong Kim,

Hyeyeon Won,

Deukhee Lee,

Taejoon Eo,

Dosik Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Joo_2025_CVPR, author = {Joo, Jinho and Kim, Hyeseong and Won, Hyeyeon and Lee, Deukhee and Eo, Taejoon and Hwang, Dosik}, title = {AeSPa : Attention-guided Self-supervised Parallel Imaging for MRI Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5217-5226} }
Boltzmann Attention Sampling for Image Analysis with Small Objects: Theodore Zhao,

Sid Kiblawi,

Naoto Usuyama,

Ho Hin Lee,

Sam Preston,

Hoifung Poon,

Mu Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Theodore and Kiblawi, Sid and Usuyama, Naoto and Lee, Ho Hin and Preston, Sam and Poon, Hoifung and Wei, Mu}, title = {Boltzmann Attention Sampling for Image Analysis with Small Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25950-25959} }
Dora: Sampling and Benchmarking for 3D Shape Variational Auto-Encoders: Rui Chen,

Jianfeng Zhang,

Yixun Liang,

Guan Luo,

Weiyu Li,

Jiarui Liu,

Xiu Li,

Xiaoxiao Long,

Jiashi Feng,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Rui and Zhang, Jianfeng and Liang, Yixun and Luo, Guan and Li, Weiyu and Liu, Jiarui and Li, Xiu and Long, Xiaoxiao and Feng, Jiashi and Tan, Ping}, title = {Dora: Sampling and Benchmarking for 3D Shape Variational Auto-Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16251-16261} }
Generalized Recorrupted-to-Recorrupted: Self-Supervised Learning Beyond Gaussian Noise: Brayan Monroy,

Jorge Bacca,

Julián Tachella; [pdf] [supp]
[bibtex]
@InProceedings{Monroy_2025_CVPR, author = {Monroy, Brayan and Bacca, Jorge and Tachella, Juli\'an}, title = {Generalized Recorrupted-to-Recorrupted: Self-Supervised Learning Beyond Gaussian Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28155-28164} }
Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple Vision-Language Model Variants: Chong Yu,

Tao Chen,

Zhongxue Gan; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Chong and Chen, Tao and Gan, Zhongxue}, title = {Once-Tuning-Multiple-Variants: Tuning Once and Expanded as Multiple Vision-Language Model Variants}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14712-14722} }
Dynamic Motion Blending for Versatile Motion Editing: Nan Jiang,

Hongjie Li,

Ziye Yuan,

Zimo He,

Yixin Chen,

Tengyu Liu,

Yixin Zhu,

Siyuan Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Nan and Li, Hongjie and Yuan, Ziye and He, Zimo and Chen, Yixin and Liu, Tengyu and Zhu, Yixin and Huang, Siyuan}, title = {Dynamic Motion Blending for Versatile Motion Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22735-22745} }
StdGEN: Semantic-Decomposed 3D Character Generation from Single Images: Yuze He,

Yanning Zhou,

Wang Zhao,

Zhongkai Wu,

Kaiwen Xiao,

Wei Yang,

Yong-Jin Liu,

Xiao Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Yuze and Zhou, Yanning and Zhao, Wang and Wu, Zhongkai and Xiao, Kaiwen and Yang, Wei and Liu, Yong-Jin and Han, Xiao}, title = {StdGEN: Semantic-Decomposed 3D Character Generation from Single Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26345-26355} }
Reconstructing Animals and the Wild: Peter Kulits,

Michael J. Black,

Silvia Zuffi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kulits_2025_CVPR, author = {Kulits, Peter and Black, Michael J. and Zuffi, Silvia}, title = {Reconstructing Animals and the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16565-16577} }
RobSense: A Robust Multi-modal Foundation Model for Remote Sensing with Static, Temporal, and Incomplete Data Adaptability: Minh Kha Do,

Kang Han,

Phu Lai,

Khoa T. Phan,

Wei Xiang; [pdf]
[bibtex]
@InProceedings{Do_2025_CVPR, author = {Do, Minh Kha and Han, Kang and Lai, Phu and Phan, Khoa T. and Xiang, Wei}, title = {RobSense: A Robust Multi-modal Foundation Model for Remote Sensing with Static, Temporal, and Incomplete Data Adaptability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7427-7436} }
Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting: Jingyi Xu,

Xieyuanli Chen,

Junyi Ma,

Jiawei Huang,

Jintao Xu,

Yue Wang,

Ling Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jingyi and Chen, Xieyuanli and Ma, Junyi and Huang, Jiawei and Xu, Jintao and Wang, Yue and Pei, Ling}, title = {Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22338-22347} }
DiffusionDrive: Truncated Diffusion Model for End-to-End Autonomous Driving: Bencheng Liao,

Shaoyu Chen,

Haoran Yin,

Bo Jiang,

Cheng Wang,

Sixu Yan,

Xinbang Zhang,

Xiangyu Li,

Ying Zhang,

Qian Zhang,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Bencheng and Chen, Shaoyu and Yin, Haoran and Jiang, Bo and Wang, Cheng and Yan, Sixu and Zhang, Xinbang and Li, Xiangyu and Zhang, Ying and Zhang, Qian and Wang, Xinggang}, title = {DiffusionDrive: Truncated Diffusion Model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12037-12047} }
MAC-Ego3D: Multi-Agent Gaussian Consensus for Real-Time Collaborative Ego-Motion and Photorealistic 3D Reconstruction: Xiaohao Xu,

Feng Xue,

Shibo Zhao,

Yike Pan,

Sebastian Scherer,

Xiaonan Huang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Xiaohao and Xue, Feng and Zhao, Shibo and Pan, Yike and Scherer, Sebastian and Huang, Xiaonan}, title = {MAC-Ego3D: Multi-Agent Gaussian Consensus for Real-Time Collaborative Ego-Motion and Photorealistic 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {854-863} }
FFR: Frequency Feature Rectification for Weakly Supervised Semantic Segmentation: Ziqian Yang,

Xinqiao Zhao,

Xiaolei Wang,

Quan Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Ziqian and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {FFR: Frequency Feature Rectification for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30261-30270} }
DVHGNN: Multi-Scale Dilated Vision HGNN for Efficient Vision Recognition: Caoshuo Li,

Tanzhe Li,

Xiaobin Hu,

Donghao Luo,

Taisong Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Caoshuo and Li, Tanzhe and Hu, Xiaobin and Luo, Donghao and Jin, Taisong}, title = {DVHGNN: Multi-Scale Dilated Vision HGNN for Efficient Vision Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20158-20168} }
Video-XL: Extra-Long Vision Language Model for Hour-Scale Video Understanding: Yan Shu,

Zheng Liu,

Peitian Zhang,

Minghao Qin,

Junjie Zhou,

Zhengyang Liang,

Tiejun Huang,

Bo Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Shu_2025_CVPR, author = {Shu, Yan and Liu, Zheng and Zhang, Peitian and Qin, Minghao and Zhou, Junjie and Liang, Zhengyang and Huang, Tiejun and Zhao, Bo}, title = {Video-XL: Extra-Long Vision Language Model for Hour-Scale Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26160-26169} }
Reconstructing In-the-Wild Open-Vocabulary Human-Object Interactions: Boran Wen,

Dingbang Huang,

Zichen Zhang,

Jiahong Zhou,

Jianbin Deng,

Jingyu Gong,

Yulong Chen,

Lizhuang Ma,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Boran and Huang, Dingbang and Zhang, Zichen and Zhou, Jiahong and Deng, Jianbin and Gong, Jingyu and Chen, Yulong and Ma, Lizhuang and Li, Yong-Lu}, title = {Reconstructing In-the-Wild Open-Vocabulary Human-Object Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17426-17436} }
GROVE: A Generalized Reward for Learning Open-Vocabulary Physical Skill: Jieming Cui,

Tengyu Liu,

Ziyu Meng,

Jiale Yu,

Ran Song,

Wei Zhang,

Yixin Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Jieming and Liu, Tengyu and Meng, Ziyu and Yu, Jiale and Song, Ran and Zhang, Wei and Zhu, Yixin and Huang, Siyuan}, title = {GROVE: A Generalized Reward for Learning Open-Vocabulary Physical Skill}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15781-15790} }
Sonata: Self-Supervised Learning of Reliable Point Representations: Xiaoyang Wu,

Daniel DeTone,

Duncan Frost,

Tianwei Shen,

Chris Xie,

Nan Yang,

Jakob Engel,

Richard Newcombe,

Hengshuang Zhao,

Julian Straub; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Xiaoyang and DeTone, Daniel and Frost, Duncan and Shen, Tianwei and Xie, Chris and Yang, Nan and Engel, Jakob and Newcombe, Richard and Zhao, Hengshuang and Straub, Julian}, title = {Sonata: Self-Supervised Learning of Reliable Point Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22193-22204} }
DriveGEN: Generalized and Robust 3D Detection in Driving via Controllable Text-to-Image Diffusion Generation: Hongbin Lin,

Zilu Guo,

Yifan Zhang,

Shuaicheng Niu,

Yafeng Li,

Ruimao Zhang,

Shuguang Cui,

Zhen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Hongbin and Guo, Zilu and Zhang, Yifan and Niu, Shuaicheng and Li, Yafeng and Zhang, Ruimao and Cui, Shuguang and Li, Zhen}, title = {DriveGEN: Generalized and Robust 3D Detection in Driving via Controllable Text-to-Image Diffusion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27497-27507} }
GauSTAR: Gaussian Surface Tracking and Reconstruction: Chengwei Zheng,

Lixin Xue,

Juan Zarate,

Jie Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Chengwei and Xue, Lixin and Zarate, Juan and Song, Jie}, title = {GauSTAR: Gaussian Surface Tracking and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16543-16553} }
Training-free Dense-Aligned Diffusion Guidance for Modular Conditional Image Synthesis: Zixuan Wang,

Duo Peng,

Feng Chen,

Yuwei Yang,

Yinjie Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zixuan and Peng, Duo and Chen, Feng and Yang, Yuwei and Lei, Yinjie}, title = {Training-free Dense-Aligned Diffusion Guidance for Modular Conditional Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13135-13145} }
DRiVE: Diffusion-based Rigging Empowers Generation of Versatile and Expressive Characters: Mingze Sun,

Junhao Chen,

Junting Dong,

Yurun Chen,

Xinyu Jiang,

Shiwei Mao,

Puhua Jiang,

Jingbo Wang,

Bo Dai,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Mingze and Chen, Junhao and Dong, Junting and Chen, Yurun and Jiang, Xinyu and Mao, Shiwei and Jiang, Puhua and Wang, Jingbo and Dai, Bo and Huang, Ruqi}, title = {DRiVE: Diffusion-based Rigging Empowers Generation of Versatile and Expressive Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21170-21180} }
Online Video Understanding: OVBench and VideoChat-Online: Zhenpeng Huang,

Xinhao Li,

Jiaqi Li,

Jing Wang,

Xiangyu Zeng,

Cheng Liang,

Tao Wu,

Xi Chen,

Liang Li,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhenpeng and Li, Xinhao and Li, Jiaqi and Wang, Jing and Zeng, Xiangyu and Liang, Cheng and Wu, Tao and Chen, Xi and Li, Liang and Wang, Limin}, title = {Online Video Understanding: OVBench and VideoChat-Online}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3328-3338} }
TADFormer: Task-Adaptive Dynamic TransFormer for Efficient Multi-Task Learning: Seungmin Baek,

Soyul Lee,

Hayeon Jo,

Hyesong Choi,

Dongbo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baek_2025_CVPR, author = {Baek, Seungmin and Lee, Soyul and Jo, Hayeon and Choi, Hyesong and Min, Dongbo}, title = {TADFormer: Task-Adaptive Dynamic TransFormer for Efficient Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14858-14868} }
A Unified Approach to Interpreting Self-supervised Pre-training Methods for 3D Point Clouds via Interactions: Qiang Li,

Jian Ruan,

Fanghao Wu,

Yuchi Chen,

Zhihua Wei,

Wen Shen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Qiang and Ruan, Jian and Wu, Fanghao and Chen, Yuchi and Wei, Zhihua and Shen, Wen}, title = {A Unified Approach to Interpreting Self-supervised Pre-training Methods for 3D Point Clouds via Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27315-27324} }
Enhancing SAM with Efficient Prompting and Preference Optimization for Semi-supervised Medical Image Segmentation: Aishik Konwer,

Zhijian Yang,

Erhan Bas,

Cao Xiao,

Prateek Prasanna,

Parminder Bhatia,

Taha Kass-Hout; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Konwer_2025_CVPR, author = {Konwer, Aishik and Yang, Zhijian and Bas, Erhan and Xiao, Cao and Prasanna, Prateek and Bhatia, Parminder and Kass-Hout, Taha}, title = {Enhancing SAM with Efficient Prompting and Preference Optimization for Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20990-21000} }
CamPoint: Boosting Point Cloud Segmentation with Virtual Camera: Jianhui Zhang,

Yizhi Luo,

Zicheng Zhang,

Xuecheng Nie,

Bonan Li; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianhui and Luo, Yizhi and Zhang, Zicheng and Nie, Xuecheng and Li, Bonan}, title = {CamPoint: Boosting Point Cloud Segmentation with Virtual Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11822-11832} }
LightLoc: Learning Outdoor LiDAR Localization at Light Speed: Wen Li,

Chen Liu,

Shangshu Yu,

Dunqiang Liu,

Yin Zhou,

Siqi Shen,

Chenglu Wen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wen and Liu, Chen and Yu, Shangshu and Liu, Dunqiang and Zhou, Yin and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {LightLoc: Learning Outdoor LiDAR Localization at Light Speed}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6680-6689} }
MERGE: Multi-faceted Hierarchical Graph-based GNN for Gene Expression Prediction from Whole Slide Histopathology Images: Aniruddha Ganguly,

Debolina Chatterjee,

Wentao Huang,

Jie Zhang,

Alisa Yurovsky,

Travis Steele Johnson,

Chao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganguly_2025_CVPR, author = {Ganguly, Aniruddha and Chatterjee, Debolina and Huang, Wentao and Zhang, Jie and Yurovsky, Alisa and Johnson, Travis Steele and Chen, Chao}, title = {MERGE: Multi-faceted Hierarchical Graph-based GNN for Gene Expression Prediction from Whole Slide Histopathology Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15611-15620} }
Accurate Differential Operators for Hybrid Neural Fields: Aditya Chetan,

Guandao Yang,

Zichen Wang,

Steve Marschner,

Bharath Hariharan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chetan_2025_CVPR, author = {Chetan, Aditya and Yang, Guandao and Wang, Zichen and Marschner, Steve and Hariharan, Bharath}, title = {Accurate Differential Operators for Hybrid Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {530-539} }
FeedEdit: Text-Based Image Editing with Dynamic Feedback Regulation: Fengyi Fu,

Lei Zhang,

Mengqi Huang,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Fengyi and Zhang, Lei and Huang, Mengqi and Mao, Zhendong}, title = {FeedEdit: Text-Based Image Editing with Dynamic Feedback Regulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2661-2670} }
Classifier-guided CLIP Distillation for Unsupervised Multi-label Classification: Dongseob Kim,

Hyunjung Shim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Dongseob and Shim, Hyunjung}, title = {Classifier-guided CLIP Distillation for Unsupervised Multi-label Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4661-4671} }
UMotion: Uncertainty-driven Human Motion Estimation from Inertial and Ultra-wideband Units: Huakun Liu,

Hiroki Ota,

Xin Wei,

Yutaro Hirao,

Monica Perusquia-Hernandez,

Hideaki Uchiyama,

Kiyoshi Kiyokawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Huakun and Ota, Hiroki and Wei, Xin and Hirao, Yutaro and Perusquia-Hernandez, Monica and Uchiyama, Hideaki and Kiyokawa, Kiyoshi}, title = {UMotion: Uncertainty-driven Human Motion Estimation from Inertial and Ultra-wideband Units}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7085-7094} }
STEREO: A Two-Stage Framework for Adversarially Robust Concept Erasing from Text-to-Image Diffusion Models: Koushik Srivatsan,

Fahad Shamshad,

Muzammal Naseer,

Vishal M. Patel,

Karthik Nandakumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Srivatsan_2025_CVPR, author = {Srivatsan, Koushik and Shamshad, Fahad and Naseer, Muzammal and Patel, Vishal M. and Nandakumar, Karthik}, title = {STEREO: A Two-Stage Framework for Adversarially Robust Concept Erasing from Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23765-23774} }
Scene Map-based Prompt Tuning for Navigation Instruction Generation: Sheng Fan,

Rui Liu,

Wenguan Wang,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Sheng and Liu, Rui and Wang, Wenguan and Yang, Yi}, title = {Scene Map-based Prompt Tuning for Navigation Instruction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6898-6908} }
GenVDM: Generating Vector Displacement Maps From a Single Image: Yuezhi Yang,

Qimin Chen,

Vladimir G. Kim,

Siddhartha Chaudhuri,

Qixing Huang,

Zhiqin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yuezhi and Chen, Qimin and Kim, Vladimir G. and Chaudhuri, Siddhartha and Huang, Qixing and Chen, Zhiqin}, title = {GenVDM: Generating Vector Displacement Maps From a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26618-26629} }
DropoutGS: Dropping Out Gaussians for Better Sparse-view Rendering: Yexing Xu,

Longguang Wang,

Minglin Chen,

Sheng Ao,

Li Li,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yexing and Wang, Longguang and Chen, Minglin and Ao, Sheng and Li, Li and Guo, Yulan}, title = {DropoutGS: Dropping Out Gaussians for Better Sparse-view Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {701-710} }
Effective SAM Combination for Open-Vocabulary Semantic Segmentation: Minhyeok Lee,

Suhwan Cho,

Jungho Lee,

Sunghun Yang,

Heeseung Choi,

Ig-Jae Kim,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Minhyeok and Cho, Suhwan and Lee, Jungho and Yang, Sunghun and Choi, Heeseung and Kim, Ig-Jae and Lee, Sangyoun}, title = {Effective SAM Combination for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26081-26090} }
Bridging Modalities: Improving Universal Multimodal Retrieval by Multimodal Large Language Models: Xin Zhang,

Yanzhao Zhang,

Wen Xie,

Mingxin Li,

Ziqi Dai,

Dingkun Long,

Pengjun Xie,

Meishan Zhang,

Wenjie Li,

Min Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xin and Zhang, Yanzhao and Xie, Wen and Li, Mingxin and Dai, Ziqi and Long, Dingkun and Xie, Pengjun and Zhang, Meishan and Li, Wenjie and Zhang, Min}, title = {Bridging Modalities: Improving Universal Multimodal Retrieval by Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9274-9285} }
Enhancing Dataset Distillation via Non-Critical Region Refinement: Minh-Tuan Tran,

Trung Le,

Xuan-May Le,

Thanh-Toan Do,

Dinh Phung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2025_CVPR, author = {Tran, Minh-Tuan and Le, Trung and Le, Xuan-May and Do, Thanh-Toan and Phung, Dinh}, title = {Enhancing Dataset Distillation via Non-Critical Region Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10015-10024} }
Towards Visual Discrimination and Reasoning of Real-World Physical Dynamics: Physics-Grounded Anomaly Detection: Wenqiao Li,

Yao Gu,

Xintao Chen,

Xiaohao Xu,

Ming Hu,

Xiaonan Huang,

Yingna Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wenqiao and Gu, Yao and Chen, Xintao and Xu, Xiaohao and Hu, Ming and Huang, Xiaonan and Wu, Yingna}, title = {Towards Visual Discrimination and Reasoning of Real-World Physical Dynamics: Physics-Grounded Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30409-30419} }
SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Models: Yongting Zhang,

Lu Chen,

Guodong Zheng,

Yifeng Gao,

Rui Zheng,

Jinlan Fu,

Zhenfei Yin,

Senjie Jin,

Yu Qiao,

Xuanjing Huang,

Feng Zhao,

Tao Gui,

Jing Shao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yongting and Chen, Lu and Zheng, Guodong and Gao, Yifeng and Zheng, Rui and Fu, Jinlan and Yin, Zhenfei and Jin, Senjie and Qiao, Yu and Huang, Xuanjing and Zhao, Feng and Gui, Tao and Shao, Jing}, title = {SPA-VL: A Comprehensive Safety Preference Alignment Dataset for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19867-19878} }
PUP 3D-GS: Principled Uncertainty Pruning for 3D Gaussian Splatting: Alex Hanson,

Allen Tu,

Vasu Singla,

Mayuka Jayawardhana,

Matthias Zwicker,

Tom Goldstein; [pdf] [supp]
[bibtex]
@InProceedings{Hanson_2025_CVPR, author = {Hanson, Alex and Tu, Allen and Singla, Vasu and Jayawardhana, Mayuka and Zwicker, Matthias and Goldstein, Tom}, title = {PUP 3D-GS: Principled Uncertainty Pruning for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5949-5958} }
UniAP: Unifying Inter- and Intra-Layer Automatic Parallelism by Mixed Integer Quadratic Programming: Hao Lin,

Ke Wu,

Jie Li,

Jun Li,

Wu-Jun Li; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Hao and Wu, Ke and Li, Jie and Li, Jun and Li, Wu-Jun}, title = {UniAP: Unifying Inter- and Intra-Layer Automatic Parallelism by Mixed Integer Quadratic Programming}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20947-20957} }
PTDiffusion: Free Lunch for Generating Optical Illusion Hidden Pictures with Phase-Transferred Diffusion Model: Xiang Gao,

Shuai Yang,

Jiaying Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Xiang and Yang, Shuai and Liu, Jiaying}, title = {PTDiffusion: Free Lunch for Generating Optical Illusion Hidden Pictures with Phase-Transferred Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18240-18249} }
ScribbleLight: Single Image Indoor Relighting with Scribbles: Jun Myeong Choi,

Annie Wang,

Pieter Peers,

Anand Bhattad,

Roni Sengupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Jun Myeong and Wang, Annie and Peers, Pieter and Bhattad, Anand and Sengupta, Roni}, title = {ScribbleLight: Single Image Indoor Relighting with Scribbles}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5720-5731} }
Preserving Clusters in Prompt Learning for Unsupervised Domain Adaptation: Tung-Long Vuong,

Hoang Phan,

Vy Vo,

Anh Bui,

Thanh-Toan Do,

Trung Le,

Dinh Phung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vuong_2025_CVPR, author = {Vuong, Tung-Long and Phan, Hoang and Vo, Vy and Bui, Anh and Do, Thanh-Toan and Le, Trung and Phung, Dinh}, title = {Preserving Clusters in Prompt Learning for Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19974-19984} }
InsightEdit: Towards Better Instruction Following for Image Editing: Yingjing Xu,

Jie Kong,

Jiazhi Wang,

Xiao Pan,

Bo Lin,

Qiang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yingjing and Kong, Jie and Wang, Jiazhi and Pan, Xiao and Lin, Bo and Liu, Qiang}, title = {InsightEdit: Towards Better Instruction Following for Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2694-2703} }
Attend to Not Attended: Structure-then-Detail Token Merging for Post-training DiT Acceleration: Haipeng Fang,

Sheng Tang,

Juan Cao,

Enshuo Zhang,

Fan Tang,

Tong-Yee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Haipeng and Tang, Sheng and Cao, Juan and Zhang, Enshuo and Tang, Fan and Lee, Tong-Yee}, title = {Attend to Not Attended: Structure-then-Detail Token Merging for Post-training DiT Acceleration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18083-18092} }
Turbo3D: Ultra-fast Text-to-3D Generation: Hanzhe Hu,

Tianwei Yin,

Fujun Luan,

Yiwei Hu,

Hao Tan,

Zexiang Xu,

Sai Bi,

Shubham Tulsiani,

Kai Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Hanzhe and Yin, Tianwei and Luan, Fujun and Hu, Yiwei and Tan, Hao and Xu, Zexiang and Bi, Sai and Tulsiani, Shubham and Zhang, Kai}, title = {Turbo3D: Ultra-fast Text-to-3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23668-23678} }
SUM Parts: Benchmarking Part-Level Semantic Segmentation of Urban Meshes: Weixiao Gao,

Liangliang Nan,

Hugo Ledoux; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Weixiao and Nan, Liangliang and Ledoux, Hugo}, title = {SUM Parts: Benchmarking Part-Level Semantic Segmentation of Urban Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24474-24484} }
One-for-More: Continual Diffusion Model for Anomaly Detection: Xiaofan Li,

Xin Tan,

Zhuo Chen,

Zhizhong Zhang,

Ruixin Zhang,

Rizen Guo,

Guanna Jiang,

Yulong Chen,

Yanyun Qu,

Lizhuang Ma,

Yuan Xie; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaofan and Tan, Xin and Chen, Zhuo and Zhang, Zhizhong and Zhang, Ruixin and Guo, Rizen and Jiang, Guanna and Chen, Yulong and Qu, Yanyun and Ma, Lizhuang and Xie, Yuan}, title = {One-for-More: Continual Diffusion Model for Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4766-4775} }
MODA: Motion-Drift Augmentation for Inertial Human Motion Analysis: Yinghao Wu,

Shihui Guo,

Yipeng Qin; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yinghao and Guo, Shihui and Qin, Yipeng}, title = {MODA: Motion-Drift Augmentation for Inertial Human Motion Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27771-27781} }
Higher-Order Ratio Cycles for Fast and Globally Optimal Shape Matching: Paul Roetzer,

Viktoria Ehm,

Daniel Cremers,

Zorah Lähner,

Florian Bernard; [pdf] [supp]
[bibtex]
@InProceedings{Roetzer_2025_CVPR, author = {Roetzer, Paul and Ehm, Viktoria and Cremers, Daniel and L\"ahner, Zorah and Bernard, Florian}, title = {Higher-Order Ratio Cycles for Fast and Globally Optimal Shape Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21793-21803} }
Omni-RGPT: Unifying Image and Video Region-level Understanding via Token Marks: Miran Heo,

Min-Hung Chen,

De-An Huang,

Sifei Liu,

Subhashree Radhakrishnan,

Seon Joo Kim,

Yu-Chiang Frank Wang,

Ryo Hachiuma; [pdf] [supp]
[bibtex]
@InProceedings{Heo_2025_CVPR, author = {Heo, Miran and Chen, Min-Hung and Huang, De-An and Liu, Sifei and Radhakrishnan, Subhashree and Kim, Seon Joo and Wang, Yu-Chiang Frank and Hachiuma, Ryo}, title = {Omni-RGPT: Unifying Image and Video Region-level Understanding via Token Marks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3919-3930} }
Hyperdimensional Uncertainty Quantification for Multimodal Uncertainty Fusion in Autonomous Vehicles Perception: Luke Chen,

Junyao Wang,

Trier Mortlock,

Pramod Khargonekar,

Mohammad Abdullah Al Faruque; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Luke and Wang, Junyao and Mortlock, Trier and Khargonekar, Pramod and Al Faruque, Mohammad Abdullah}, title = {Hyperdimensional Uncertainty Quantification for Multimodal Uncertainty Fusion in Autonomous Vehicles Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22306-22316} }
EDM: Equirectangular Projection-Oriented Dense Kernelized Feature Matching: Dongki Jung,

Jaehoon Choi,

Yonghan Lee,

Somi Jeong,

Taejae Lee,

Dinesh Manocha,

Suyong Yeon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Dongki and Choi, Jaehoon and Lee, Yonghan and Jeong, Somi and Lee, Taejae and Manocha, Dinesh and Yeon, Suyong}, title = {EDM: Equirectangular Projection-Oriented Dense Kernelized Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6337-6347} }
EZSR: Event-based Zero-Shot Recognition: Yan Yang,

Liyuan Pan,

Dongxu Li,

Liu Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yan and Pan, Liyuan and Li, Dongxu and Liu, Liu}, title = {EZSR: Event-based Zero-Shot Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4628-4638} }
FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation: Sen Wang,

Le Wang,

Sanping Zhou,

Jingyi Tian,

Jiayi Li,

Haowen Sun,

Wei Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Sen and Wang, Le and Zhou, Sanping and Tian, Jingyi and Li, Jiayi and Sun, Haowen and Tang, Wei}, title = {FlowRAM: Grounding Flow Matching Policy with Region-Aware Mamba Framework for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12176-12186} }
Visual Lexicon: Rich Image Features in Language Space: XuDong Wang,

Xingyi Zhou,

Alireza Fathi,

Trevor Darrell,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, XuDong and Zhou, Xingyi and Fathi, Alireza and Darrell, Trevor and Schmid, Cordelia}, title = {Visual Lexicon: Rich Image Features in Language Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19736-19747} }
SVFR: A Unified Framework for Generalized Video Face Restoration: Zhiyao Wang,

Xu Chen,

Chengming Xu,

Junwei Zhu,

Xiaobin Hu,

Jiangning Zhang,

Chengjie Wang,

Yuqi Liu,

Yiyi Zhou,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhiyao and Chen, Xu and Xu, Chengming and Zhu, Junwei and Hu, Xiaobin and Zhang, Jiangning and Wang, Chengjie and Liu, Yuqi and Zhou, Yiyi and Ji, Rongrong}, title = {SVFR: A Unified Framework for Generalized Video Face Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7406-7415} }
Decoupling Fine Detail and Global Geometry for Compressed Depth Map Super-Resolution: Huan Zheng,

Wencheng Han,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Huan and Han, Wencheng and Shen, Jianbing}, title = {Decoupling Fine Detail and Global Geometry for Compressed Depth Map Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {951-960} }
Test-Time Visual In-Context Tuning: Jiahao Xie,

Alessio Tonioni,

Nathalie Rauschmayr,

Federico Tombari,

Bernt Schiele; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Jiahao and Tonioni, Alessio and Rauschmayr, Nathalie and Tombari, Federico and Schiele, Bernt}, title = {Test-Time Visual In-Context Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19996-20005} }
Prior Does Matter: Visual Navigation via Denoising Diffusion Bridge Models: Hao Ren,

Yiming Zeng,

Zetong Bi,

Zhaoliang Wan,

Junlong Huang,

Hui Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Hao and Zeng, Yiming and Bi, Zetong and Wan, Zhaoliang and Huang, Junlong and Cheng, Hui}, title = {Prior Does Matter: Visual Navigation via Denoising Diffusion Bridge Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12100-12110} }
Digital Twin Catalog: A Large-Scale Photorealistic 3D Object Digital Twin Dataset: Zhao Dong,

Ka Chen,

Zhaoyang Lv,

Hong-Xing Yu,

Yunzhi Zhang,

Cheng Zhang,

Yufeng Zhu,

Stephen Tian,

Zhengqin Li,

Geordie Moffatt,

Sean Christofferson,

James Fort,

Xiaqing Pan,

Mingfei Yan,

Jiajun Wu,

Carl Yuheng Ren,

Richard Newcombe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Zhao and Chen, Ka and Lv, Zhaoyang and Yu, Hong-Xing and Zhang, Yunzhi and Zhang, Cheng and Zhu, Yufeng and Tian, Stephen and Li, Zhengqin and Moffatt, Geordie and Christofferson, Sean and Fort, James and Pan, Xiaqing and Yan, Mingfei and Wu, Jiajun and Ren, Carl Yuheng and Newcombe, Richard}, title = {Digital Twin Catalog: A Large-Scale Photorealistic 3D Object Digital Twin Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {753-763} }
SegEarth-OV: Towards Training-Free Open-Vocabulary Segmentation for Remote Sensing Images: Kaiyu Li,

Ruixun Liu,

Xiangyong Cao,

Xueru Bai,

Feng Zhou,

Deyu Meng,

Zhi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Kaiyu and Liu, Ruixun and Cao, Xiangyong and Bai, Xueru and Zhou, Feng and Meng, Deyu and Wang, Zhi}, title = {SegEarth-OV: Towards Training-Free Open-Vocabulary Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10545-10556} }
MeshGen: Generating PBR Textured Mesh with Render-Enhanced Auto-Encoder and Generative Data Augmentation: Zilong Chen,

Yikai Wang,

Wenqiang Sun,

Feng Wang,

Yiwen Chen,

Huaping Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zilong and Wang, Yikai and Sun, Wenqiang and Wang, Feng and Chen, Yiwen and Liu, Huaping}, title = {MeshGen: Generating PBR Textured Mesh with Render-Enhanced Auto-Encoder and Generative Data Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5835-5848} }
GIFStream: 4D Gaussian-based Immersive Video with Feature Stream: Hao Li,

Sicheng Li,

Xiang Gao,

Abudouaihati Batuer,

Lu Yu,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hao and Li, Sicheng and Gao, Xiang and Batuer, Abudouaihati and Yu, Lu and Liao, Yiyi}, title = {GIFStream: 4D Gaussian-based Immersive Video with Feature Stream}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21761-21770} }
DeClotH: Decomposable 3D Cloth and Human Body Reconstruction from a Single Image: Hyeongjin Nam,

Donghwan Kim,

Jeongtaek Oh,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_CVPR, author = {Nam, Hyeongjin and Kim, Donghwan and Oh, Jeongtaek and Lee, Kyoung Mu}, title = {DeClotH: Decomposable 3D Cloth and Human Body Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5636-5645} }
Neuron: Learning Context-Aware Evolving Representations for Zero-Shot Skeleton Action Recognition: Yang Chen,

Jingcai Guo,

Song Guo,

Dacheng Tao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yang and Guo, Jingcai and Guo, Song and Tao, Dacheng}, title = {Neuron: Learning Context-Aware Evolving Representations for Zero-Shot Skeleton Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8721-8730} }
Multi-Scale Neighborhood Occupancy Masked Autoencoder for Self-Supervised Learning in LiDAR Point Clouds: Mohamed Abdelsamad,

Michael Ulrich,

Claudius Glaeser,

Abhinav Valada; [pdf] [supp]
[bibtex]
@InProceedings{Abdelsamad_2025_CVPR, author = {Abdelsamad, Mohamed and Ulrich, Michael and Glaeser, Claudius and Valada, Abhinav}, title = {Multi-Scale Neighborhood Occupancy Masked Autoencoder for Self-Supervised Learning in LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22234-22243} }
Do We Really Need Curated Malicious Data for Safety Alignment in Multi-modal Large Language Models?: Yanbo Wang,

Jiyang Guan,

Jian Liang,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yanbo and Guan, Jiyang and Liang, Jian and He, Ran}, title = {Do We Really Need Curated Malicious Data for Safety Alignment in Multi-modal Large Language Models?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19879-19889} }
High-Fidelity Relightable Monocular Portrait Animation with Lighting-Controllable Video Diffusion Model: Mingtao Guo,

Guanyu Xing,

Yanli Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Mingtao and Xing, Guanyu and Liu, Yanli}, title = {High-Fidelity Relightable Monocular Portrait Animation with Lighting-Controllable Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {228-238} }
Plug-and-Play PPO: An Adaptive Point Prompt Optimizer Making SAM Greater: Xueyu Liu,

Rui Wang,

Yexin Lai,

Guangze Shi,

Feixue Shao,

Fang Hao,

Jianan Zhang,

Jia Shen,

Yongfei Wu,

Wen Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xueyu and Wang, Rui and Lai, Yexin and Shi, Guangze and Shao, Feixue and Hao, Fang and Zhang, Jianan and Shen, Jia and Wu, Yongfei and Zheng, Wen}, title = {Plug-and-Play PPO: An Adaptive Point Prompt Optimizer Making SAM Greater}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4332-4342} }
Harnessing Global-Local Collaborative Adversarial Perturbation for Anti-Customization: Long Xu,

Jiakai Wang,

Haojie Hao,

Haotong Qin,

Jiejie Zhao,

Xianglong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Long and Wang, Jiakai and Hao, Haojie and Qin, Haotong and Zhao, Jiejie and Liu, Xianglong}, title = {Harnessing Global-Local Collaborative Adversarial Perturbation for Anti-Customization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13414-13423} }
EchoONE: Segmenting Multiple Echocardiography Planes in One Model: Jiongtong Hu,

Wufeng Xue,

Jun Cheng,

Yingying Liu,

Wei Zhuo,

Dong Ni; [pdf] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Jiongtong and Xue, Wufeng and Cheng, Jun and Liu, Yingying and Zhuo, Wei and Ni, Dong}, title = {EchoONE: Segmenting Multiple Echocardiography Planes in One Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5207-5216} }
Acc3D: Accelerating Single Image to 3D Diffusion Models via Edge Consistency Guided Score Distillation: Kendong Liu,

Zhiyu Zhu,

Hui Liu,

Junhui Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Kendong and Zhu, Zhiyu and Liu, Hui and Hou, Junhui}, title = {Acc3D: Accelerating Single Image to 3D Diffusion Models via Edge Consistency Guided Score Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18031-18040} }
EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild: Yumeng Liu,

Xiaoxiao Long,

Zemin Yang,

Yuan Liu,

Marc Habermann,

Christian Theobalt,

Yuexin Ma,

Wenping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yumeng and Long, Xiaoxiao and Yang, Zemin and Liu, Yuan and Habermann, Marc and Theobalt, Christian and Ma, Yuexin and Wang, Wenping}, title = {EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7037-7047} }
PLeaS - Merging Models with Permutations and Least Squares: Anshul Nasery,

Jonathan Hayase,

Pang Wei Koh,

Sewoong Oh; [pdf] [supp]
[bibtex]
@InProceedings{Nasery_2025_CVPR, author = {Nasery, Anshul and Hayase, Jonathan and Koh, Pang Wei and Oh, Sewoong}, title = {PLeaS - Merging Models with Permutations and Least Squares}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30493-30502} }
Incremental Object Keypoint Learning: Mingfu Liang,

Jiahuan Zhou,

Xu Zou,

Ying Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Mingfu and Zhou, Jiahuan and Zou, Xu and Wu, Ying}, title = {Incremental Object Keypoint Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25399-25410} }
Soft Self-labeling and Potts Relaxations for Weakly-supervised Segmentation: Zhongwen Zhang,

Yuri Boykov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhongwen and Boykov, Yuri}, title = {Soft Self-labeling and Potts Relaxations for Weakly-supervised Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20244-20253} }
MVSAnywhere: Zero-Shot Multi-View Stereo: Sergio Izquierdo,

Mohamed Sayed,

Michael Firman,

Guillermo Garcia-Hernando,

Daniyar Turmukhambetov,

Javier Civera,

Oisin Mac Aodha,

Gabriel Brostow,

Jamie Watson; [pdf] [arXiv]
[bibtex]
@InProceedings{Izquierdo_2025_CVPR, author = {Izquierdo, Sergio and Sayed, Mohamed and Firman, Michael and Garcia-Hernando, Guillermo and Turmukhambetov, Daniyar and Civera, Javier and Mac Aodha, Oisin and Brostow, Gabriel and Watson, Jamie}, title = {MVSAnywhere: Zero-Shot Multi-View Stereo}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11493-11504} }
InteractVLM: 3D Interaction Reasoning from 2D Foundational Models: Sai Kumar Dwivedi,

Dimitrije Antić,

Shashank Tripathi,

Omid Taheri,

Cordelia Schmid,

Michael J. Black,

Dimitrios Tzionas; [pdf] [supp]
[bibtex]
@InProceedings{Dwivedi_2025_CVPR, author = {Dwivedi, Sai Kumar and Anti\'c, Dimitrije and Tripathi, Shashank and Taheri, Omid and Schmid, Cordelia and Black, Michael J. and Tzionas, Dimitrios}, title = {InteractVLM: 3D Interaction Reasoning from 2D Foundational Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22605-22615} }
Patch Matters: Training-free Fine-grained Image Caption Enhancement via Local Perception: Ruotian Peng,

Haiying He,

Yake Wei,

Yandong Wen,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Ruotian and He, Haiying and Wei, Yake and Wen, Yandong and Hu, Di}, title = {Patch Matters: Training-free Fine-grained Image Caption Enhancement via Local Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3963-3973} }
Attribute-Missing Multi-view Graph Clustering: Bowen Zhao,

Qianqian Wang,

Zhengming Ding,

Quanxue Gao; [pdf]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Bowen and Wang, Qianqian and Ding, Zhengming and Gao, Quanxue}, title = {Attribute-Missing Multi-view Graph Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25832-25841} }
Generating 6DoF Object Manipulation Trajectories from Action Description in Egocentric Vision: Tomoya Yoshida,

Shuhei Kurita,

Taichi Nishimura,

Shinsuke Mori; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoshida_2025_CVPR, author = {Yoshida, Tomoya and Kurita, Shuhei and Nishimura, Taichi and Mori, Shinsuke}, title = {Generating 6DoF Object Manipulation Trajectories from Action Description in Egocentric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17370-17382} }
Pose-Guided Temporal Enhancement for Robust Low-Resolution Hand Reconstruction: Kaixin Fan,

Pengfei Ren,

Jingyu Wang,

Haifeng Sun,

Qi Qi,

Zirui Zhuang,

Jianxin Liao; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Kaixin and Ren, Pengfei and Wang, Jingyu and Sun, Haifeng and Qi, Qi and Zhuang, Zirui and Liao, Jianxin}, title = {Pose-Guided Temporal Enhancement for Robust Low-Resolution Hand Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22627-22637} }
ReDiffDet: Rotation-equivariant Diffusion Model for Oriented Object Detection: Jiaqi Zhao,

Zeyu Ding,

Yong Zhou,

Hancheng Zhu,

Wen-Liang Du,

Rui Yao; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jiaqi and Ding, Zeyu and Zhou, Yong and Zhu, Hancheng and Du, Wen-Liang and Yao, Rui}, title = {ReDiffDet: Rotation-equivariant Diffusion Model for Oriented Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24429-24439} }
PosterO: Structuring Layout Trees to Enable Language Models in Generalized Content-Aware Layout Generation: HsiaoYuan Hsu,

Yuxin Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsu_2025_CVPR, author = {Hsu, HsiaoYuan and Peng, Yuxin}, title = {PosterO: Structuring Layout Trees to Enable Language Models in Generalized Content-Aware Layout Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8117-8127} }
BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature: Alejandro Lozano,

Min Woo Sun,

James Burgess,

Liangyu Chen,

Jeffrey J. Nirschl,

Jeffrey Gu,

Ivan Lopez,

Josiah Aklilu,

Anita Rau,

Austin Wolfgang Katzer,

Yuhui Zhang,

Collin Chiu,

Xiaohan Wang,

Alfred Seunghoon Song,

Robert Tibshirani,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lozano_2025_CVPR, author = {Lozano, Alejandro and Sun, Min Woo and Burgess, James and Chen, Liangyu and Nirschl, Jeffrey J. and Gu, Jeffrey and Lopez, Ivan and Aklilu, Josiah and Rau, Anita and Katzer, Austin Wolfgang and Zhang, Yuhui and Chiu, Collin and Wang, Xiaohan and Song, Alfred Seunghoon and Tibshirani, Robert and Yeung-Levy, Serena}, title = {BIOMEDICA: An Open Biomedical Image-Caption Archive, Dataset, and Vision-Language Models Derived from Scientific Literature}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19724-19735} }
Unlocking Generalization Power in LiDAR Point Cloud Registration: Zhenxuan Zeng,

Qiao Wu,

Xiyu Zhang,

Lin Yuanbo Wu,

Pei An,

Jiaqi Yang,

Ji Wang,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Zhenxuan and Wu, Qiao and Zhang, Xiyu and Wu, Lin Yuanbo and An, Pei and Yang, Jiaqi and Wang, Ji and Wang, Peng}, title = {Unlocking Generalization Power in LiDAR Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22244-22253} }
Structure-Aware Correspondence Learning for Relative Pose Estimation: Yihan Chen,

Wenfei Yang,

Huan Ren,

Shifeng Zhang,

Tianzhu Zhang,

Feng Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yihan and Yang, Wenfei and Ren, Huan and Zhang, Shifeng and Zhang, Tianzhu and Wu, Feng}, title = {Structure-Aware Correspondence Learning for Relative Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11611-11621} }
LoRA Recycle: Unlocking Tuning-Free Few-Shot Adaptability in Visual Foundation Models by Recycling Pre-Tuned LoRAs: Zixuan Hu,

Yongxian Wei,

Li Shen,

Chun Yuan,

Dacheng Tao; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Zixuan and Wei, Yongxian and Shen, Li and Yuan, Chun and Tao, Dacheng}, title = {LoRA Recycle: Unlocking Tuning-Free Few-Shot Adaptability in Visual Foundation Models by Recycling Pre-Tuned LoRAs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25026-25037} }
One2Any: One-Reference 6D Pose Estimation for Any Object: Mengya Liu,

Siyuan Li,

Ajad Chhatkuli,

Prune Truong,

Luc Van Gool,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Mengya and Li, Siyuan and Chhatkuli, Ajad and Truong, Prune and Van Gool, Luc and Tombari, Federico}, title = {One2Any: One-Reference 6D Pose Estimation for Any Object}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6457-6467} }
PyTorchGeoNodes: Enabling Differentiable Shape Programs for 3D Shape Reconstruction: Sinisa Stekovic,

Arslan Artykov,

Stefan Ainetter,

Mattia D'Urso,

Friedrich Fraundorfer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stekovic_2025_CVPR, author = {Stekovic, Sinisa and Artykov, Arslan and Ainetter, Stefan and D'Urso, Mattia and Fraundorfer, Friedrich}, title = {PyTorchGeoNodes: Enabling Differentiable Shape Programs for 3D Shape Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16283-16292} }
Contextual AD Narration with Interleaved Multimodal Sequence: Hanlin Wang,

Zhan Tong,

Kecheng Zheng,

Yujun Shen,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hanlin and Tong, Zhan and Zheng, Kecheng and Shen, Yujun and Wang, Limin}, title = {Contextual AD Narration with Interleaved Multimodal Sequence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8372-8383} }
FIFA: Fine-grained Inter-frame Attention for Driver's Video Gaze Estimation: Daosong Hu,

Mingyue Cui,

Kai Huang; [pdf]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Daosong and Cui, Mingyue and Huang, Kai}, title = {FIFA: Fine-grained Inter-frame Attention for Driver's Video Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18760-18769} }
MNE-SLAM: Multi-Agent Neural SLAM for Mobile Robots: Tianchen Deng,

Guole Shen,

Chen Xun,

Shenghai Yuan,

Tongxin Jin,

Hongming Shen,

Yanbo Wang,

Jingchuan Wang,

Hesheng Wang,

Danwei Wang,

Weidong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Tianchen and Shen, Guole and Xun, Chen and Yuan, Shenghai and Jin, Tongxin and Shen, Hongming and Wang, Yanbo and Wang, Jingchuan and Wang, Hesheng and Wang, Danwei and Chen, Weidong}, title = {MNE-SLAM: Multi-Agent Neural SLAM for Mobile Robots}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1485-1494} }
TensoFlow: Tensorial Flow-based Sampler for Inverse Rendering: Chun Gu,

Xiaofei Wei,

Li Zhang,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Chun and Wei, Xiaofei and Zhang, Li and Zhu, Xiatian}, title = {TensoFlow: Tensorial Flow-based Sampler for Inverse Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {495-504} }
FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering: Chengyue Huang,

Brisa Maneechotesuwan,

Shivang Chopra,

Zsolt Kira; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Chengyue and Maneechotesuwan, Brisa and Chopra, Shivang and Kira, Zsolt}, title = {FRAMES-VQA: Benchmarking Fine-Tuning Robustness across Multi-Modal Shifts in Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3909-3918} }
Shape Abstraction via Marching Differentiable Support Functions: Sunkyung Park,

Jeongmin Lee,

Dongjun Lee; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Sunkyung and Lee, Jeongmin and Lee, Dongjun}, title = {Shape Abstraction via Marching Differentiable Support Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16902-16911} }
LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive Visual Preferences: Hongyan Zhi,

Peihao Chen,

Junyan Li,

Shuailei Ma,

Xinyu Sun,

Tianhang Xiang,

Yinjie Lei,

Mingkui Tan,

Chuang Gan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhi_2025_CVPR, author = {Zhi, Hongyan and Chen, Peihao and Li, Junyan and Ma, Shuailei and Sun, Xinyu and Xiang, Tianhang and Lei, Yinjie and Tan, Mingkui and Gan, Chuang}, title = {LSceneLLM: Enhancing Large 3D Scene Understanding Using Adaptive Visual Preferences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3761-3771} }
Event Fields: Capturing Light Fields at High Speed, Resolution, and Dynamic Range: Ziyuan Qu,

Zihao Zou,

Vivek Boominathan,

Praneeth Chakravarthula,

Adithya Pediredla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Ziyuan and Zou, Zihao and Boominathan, Vivek and Chakravarthula, Praneeth and Pediredla, Adithya}, title = {Event Fields: Capturing Light Fields at High Speed, Resolution, and Dynamic Range}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26910-26920} }
HyperFree: A Channel-adaptive and Tuning-free Foundation Model for Hyperspectral Remote Sensing Imagery: Jingtao Li,

Yingyi Liu,

Xinyu Wang,

Yunning Peng,

Chen Sun,

Shaoyu Wang,

Zhendong Sun,

Tian Ke,

Xiao Jiang,

Tangwei Lu,

Anran Zhao,

Yanfei Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jingtao and Liu, Yingyi and Wang, Xinyu and Peng, Yunning and Sun, Chen and Wang, Shaoyu and Sun, Zhendong and Ke, Tian and Jiang, Xiao and Lu, Tangwei and Zhao, Anran and Zhong, Yanfei}, title = {HyperFree: A Channel-adaptive and Tuning-free Foundation Model for Hyperspectral Remote Sensing Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23048-23058} }
Exploring Temporally-Aware Features for Point Tracking: Inès Hyeonsu Kim,

Seokju Cho,

Jiahui Huang,

Jung Yi,

Joon-Young Lee,

Seungryong Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, In\`es Hyeonsu and Cho, Seokju and Huang, Jiahui and Yi, Jung and Lee, Joon-Young and Kim, Seungryong}, title = {Exploring Temporally-Aware Features for Point Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1962-1972} }
GBlobs: Explicit Local Structure via Gaussian Blobs for Improved Cross-Domain LiDAR-based 3D Object Detection: Dušan Malić,

Christian Fruhwirth-Reisinger,

Samuel Schulter,

Horst Possegger; [pdf] [supp]
[bibtex]
@InProceedings{Malic_2025_CVPR, author = {Mali\'c, Du\v{s}an and Fruhwirth-Reisinger, Christian and Schulter, Samuel and Possegger, Horst}, title = {GBlobs: Explicit Local Structure via Gaussian Blobs for Improved Cross-Domain LiDAR-based 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27357-27367} }
V^2Dial: Unification of Video and Visual Dialog via Multimodal Experts: Adnen Abdessaied,

Anna Rohrbach,

Marcus Rohrbach,

Andreas Bulling; [pdf] [supp]
[bibtex]
@InProceedings{Abdessaied_2025_CVPR, author = {Abdessaied, Adnen and Rohrbach, Anna and Rohrbach, Marcus and Bulling, Andreas}, title = {V{\textasciicircum}2Dial: Unification of Video and Visual Dialog via Multimodal Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8637-8647} }
Detail-Preserving Latent Diffusion for Stable Shadow Removal: Jiamin Xu,

Yuxin Zheng,

Zelong Li,

Chi Wang,

Renshu Gu,

Weiwei Xu,

Gang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jiamin and Zheng, Yuxin and Li, Zelong and Wang, Chi and Gu, Renshu and Xu, Weiwei and Xu, Gang}, title = {Detail-Preserving Latent Diffusion for Stable Shadow Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7592-7602} }
Scaling Down Text Encoders of Text-to-Image Diffusion Models: Lifu Wang,

Daqing Liu,

Xinchen Liu,

Xiaodong He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lifu and Liu, Daqing and Liu, Xinchen and He, Xiaodong}, title = {Scaling Down Text Encoders of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18424-18433} }
3D Gaussian Head Avatars with Expressive Dynamic Appearances by Compact Tensorial Representations: Yating Wang,

Xuan Wang,

Ran Yi,

Yanbo Fan,

Jichen Hu,

Jingcheng Zhu,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yating and Wang, Xuan and Yi, Ran and Fan, Yanbo and Hu, Jichen and Zhu, Jingcheng and Ma, Lizhuang}, title = {3D Gaussian Head Avatars with Expressive Dynamic Appearances by Compact Tensorial Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21117-21126} }
MambaIRv2: Attentive State Space Restoration: Hang Guo,

Yong Guo,

Yaohua Zha,

Yulun Zhang,

Wenbo Li,

Tao Dai,

Shu-Tao Xia,

Yawei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Hang and Guo, Yong and Zha, Yaohua and Zhang, Yulun and Li, Wenbo and Dai, Tao and Xia, Shu-Tao and Li, Yawei}, title = {MambaIRv2: Attentive State Space Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28124-28133} }
Floating No More: Object-Ground Reconstruction from a Single Image: Yunze Man,

Yichen Sheng,

Jianming Zhang,

Liang-Yan Gui,

Yu-Xiong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Man_2025_CVPR, author = {Man, Yunze and Sheng, Yichen and Zhang, Jianming and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Floating No More: Object-Ground Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27134-27143} }
POT: Prototypical Optimal Transport for Weakly Supervised Semantic Segmentation: Jian Wang,

Tianhong Dai,

Bingfeng Zhang,

Siyue Yu,

Eng Gee Lim,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Dai, Tianhong and Zhang, Bingfeng and Yu, Siyue and Lim, Eng Gee and Xiao, Jimin}, title = {POT: Prototypical Optimal Transport for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15055-15064} }
CrossOver: 3D Scene Cross-Modal Alignment: Sayan Deb Sarkar,

Ondrej Miksik,

Marc Pollefeys,

Daniel Barath,

Iro Armeni; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarkar_2025_CVPR, author = {Sarkar, Sayan Deb and Miksik, Ondrej and Pollefeys, Marc and Barath, Daniel and Armeni, Iro}, title = {CrossOver: 3D Scene Cross-Modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8985-8994} }
Rethinking Temporal Fusion with a Unified Gradient Descent View for 3D Semantic Occupancy Prediction: Dubing Chen,

Huan Zheng,

Jin Fang,

Xingping Dong,

Xianfei Li,

Wenlong Liao,

Tao He,

Pai Peng,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Dubing and Zheng, Huan and Fang, Jin and Dong, Xingping and Li, Xianfei and Liao, Wenlong and He, Tao and Peng, Pai and Shen, Jianbing}, title = {Rethinking Temporal Fusion with a Unified Gradient Descent View for 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1505-1515} }
SKE-Layout: Spatial Knowledge Enhanced Layout Generation with LLMs: Junsheng Wang,

Nieqing Cao,

Yan Ding,

Mengying Xie,

Fuqiang Gu,

Chao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Junsheng and Cao, Nieqing and Ding, Yan and Xie, Mengying and Gu, Fuqiang and Chen, Chao}, title = {SKE-Layout: Spatial Knowledge Enhanced Layout Generation with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19414-19423} }
Gaussian Eigen Models for Human Heads: Wojciech Zielonka,

Timo Bolkart,

Thabo Beeler,

Justus Thies; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zielonka_2025_CVPR, author = {Zielonka, Wojciech and Bolkart, Timo and Beeler, Thabo and Thies, Justus}, title = {Gaussian Eigen Models for Human Heads}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15930-15940} }
Scalable Video-to-Dataset Generation for Cross-Platform Mobile Agents: Yunseok Jang,

Yeda Song,

Sungryull Sohn,

Lajanugen Logeswaran,

Tiange Luo,

Dong-Ki Kim,

Kyunghoon Bae,

Honglak Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Yunseok and Song, Yeda and Sohn, Sungryull and Logeswaran, Lajanugen and Luo, Tiange and Kim, Dong-Ki and Bae, Kyunghoon and Lee, Honglak}, title = {Scalable Video-to-Dataset Generation for Cross-Platform Mobile Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8604-8614} }
Pattern Analogies: Learning to Perform Programmatic Image Edits by Analogy: Aditya Ganeshan,

Thibault Groueix,

Paul Guerrero,

Radomir Mech,

Matthew Fisher,

Daniel Ritchie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganeshan_2025_CVPR, author = {Ganeshan, Aditya and Groueix, Thibault and Guerrero, Paul and Mech, Radomir and Fisher, Matthew and Ritchie, Daniel}, title = {Pattern Analogies: Learning to Perform Programmatic Image Edits by Analogy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28715-28725} }
4D-Fly: Fast 4D Reconstruction from a Single Monocular Video: Diankun Wu,

Fangfu Liu,

Yi-Hsin Hung,

Yue Qian,

Xiaohang Zhan,

Yueqi Duan; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Diankun and Liu, Fangfu and Hung, Yi-Hsin and Qian, Yue and Zhan, Xiaohang and Duan, Yueqi}, title = {4D-Fly: Fast 4D Reconstruction from a Single Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16663-16673} }
STAR-Edge: Structure-aware Local Spherical Curve Representation for Thin-walled Edge Extraction from Unstructured Point Clouds: Zikuan Li,

Honghua Chen,

Yuecheng Wang,

Sibo Wu,

Mingqiang Wei,

Jun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zikuan and Chen, Honghua and Wang, Yuecheng and Wu, Sibo and Wei, Mingqiang and Wang, Jun}, title = {STAR-Edge: Structure-aware Local Spherical Curve Representation for Thin-walled Edge Extraction from Unstructured Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27254-27263} }
Tokenize Image Patches: Global Context Fusion for Effective Haze Removal in Large Images: Jiuchen Chen,

Xinyu Yan,

Qizhi Xu,

Kaiqi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jiuchen and Yan, Xinyu and Xu, Qizhi and Li, Kaiqi}, title = {Tokenize Image Patches: Global Context Fusion for Effective Haze Removal in Large Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2258-2268} }
Complementary Advantages: Exploiting Cross-Field Frequency Correlation for NIR-Assisted Image Denoising: Yuchen Wang,

Hongyuan Wang,

Lizhi Wang,

Xin Wang,

Lin Zhu,

Wanxuan Lu,

Hua Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuchen and Wang, Hongyuan and Wang, Lizhi and Wang, Xin and Zhu, Lin and Lu, Wanxuan and Huang, Hua}, title = {Complementary Advantages: Exploiting Cross-Field Frequency Correlation for NIR-Assisted Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12679-12689} }
Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation: Shivam Duggal,

Yushi Hu,

Oscar Michel,

Aniruddha Kembhavi,

William T. Freeman,

Noah A. Smith,

Ranjay Krishna,

Antonio Torralba,

Ali Farhadi,

Wei-Chiu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duggal_2025_CVPR, author = {Duggal, Shivam and Hu, Yushi and Michel, Oscar and Kembhavi, Aniruddha and Freeman, William T. and Smith, Noah A. and Krishna, Ranjay and Torralba, Antonio and Farhadi, Ali and Ma, Wei-Chiu}, title = {Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13326-13336} }
Boosting the Dual-Stream Architecture in Ultra-High Resolution Segmentation with Resolution-Biased Uncertainty Estimation: Rong Qin,

Xingyu Liu,

Jinglei Shi,

Liang Lin,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Rong and Liu, Xingyu and Shi, Jinglei and Lin, Liang and Yang, Jufeng}, title = {Boosting the Dual-Stream Architecture in Ultra-High Resolution Segmentation with Resolution-Biased Uncertainty Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25960-25970} }
DiffLO: Semantic-Aware LiDAR Odometry with Diffusion-Based Refinement: Yongshu Huang,

Chen Liu,

Minghang Zhu,

Sheng Ao,

Chenglu Wen,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yongshu and Liu, Chen and Zhu, Minghang and Ao, Sheng and Wen, Chenglu and Wang, Cheng}, title = {DiffLO: Semantic-Aware LiDAR Odometry with Diffusion-Based Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17050-17059} }
pFedMxF: Personalized Federated Class-Incremental Learning with Mixture of Frequency Aggregation: Yifei Zhang,

Hao Zhu,

Alysa Ziying Tan,

Dianzhi Yu,

Longtao Huang,

Han Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yifei and Zhu, Hao and Tan, Alysa Ziying and Yu, Dianzhi and Huang, Longtao and Yu, Han}, title = {pFedMxF: Personalized Federated Class-Incremental Learning with Mixture of Frequency Aggregation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30640-30650} }
Style-Editor: Text-driven Object-centric Style Editing: Jihun Park,

Jongmin Gim,

Kyoungmin Lee,

Seunghun Lee,

Sunghoon Im; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jihun and Gim, Jongmin and Lee, Kyoungmin and Lee, Seunghun and Im, Sunghoon}, title = {Style-Editor: Text-driven Object-centric Style Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18281-18291} }
Transfer Your Perspective: Controllable 3D Generation from Any Viewpoint in a Driving Scene: Tai-Yu Pan,

Sooyoung Jeon,

Mengdi Fan,

Jinsu Yoo,

Zhenyang Feng,

Mark Campbell,

Kilian Q. Weinberger,

Bharath Hariharan,

Wei-Lun Chao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Tai-Yu and Jeon, Sooyoung and Fan, Mengdi and Yoo, Jinsu and Feng, Zhenyang and Campbell, Mark and Weinberger, Kilian Q. and Hariharan, Bharath and Chao, Wei-Lun}, title = {Transfer Your Perspective: Controllable 3D Generation from Any Viewpoint in a Driving Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12027-12036} }
Efficient Transfer Learning for Video-language Foundation Models: Haoxing Chen,

Zizheng Huang,

Yan Hong,

Yanshuo Wang,

Zhongcai Lyu,

Zhuoer Xu,

Jun Lan,

Zhangxuan Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Haoxing and Huang, Zizheng and Hong, Yan and Wang, Yanshuo and Lyu, Zhongcai and Xu, Zhuoer and Lan, Jun and Gu, Zhangxuan}, title = {Efficient Transfer Learning for Video-language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29129-29138} }
Radio Frequency Ray Tracing with Neural Object Representation for Enhanced RF Modeling: Xingyu Chen,

Zihao Feng,

Kun Qian,

Xinyu Zhang; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xingyu and Feng, Zihao and Qian, Kun and Zhang, Xinyu}, title = {Radio Frequency Ray Tracing with Neural Object Representation for Enhanced RF Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21339-21348} }
ANNEXE: Unified Analyzing, Answering, and Pixel Grounding for Egocentric Interaction: Yuejiao Su,

Yi Wang,

Qiongyang Hu,

Chuang Yang,

Lap-Pui Chau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Yuejiao and Wang, Yi and Hu, Qiongyang and Yang, Chuang and Chau, Lap-Pui}, title = {ANNEXE: Unified Analyzing, Answering, and Pixel Grounding for Egocentric Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9027-9038} }
MET3R: Measuring Multi-View Consistency in Generated Images: Mohammad Asim,

Christopher Wewer,

Thomas Wimmer,

Bernt Schiele,

Jan Eric Lenssen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asim_2025_CVPR, author = {Asim, Mohammad and Wewer, Christopher and Wimmer, Thomas and Schiele, Bernt and Lenssen, Jan Eric}, title = {MET3R: Measuring Multi-View Consistency in Generated Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6034-6044} }
Segmenting Maxillofacial Structures in CBCT Volumes: Federico Bolelli,

Kevin Marchesini,

Niels van Nistelrooij,

Luca Lumetti,

Vittorio Pipoli,

Elisa Ficarra,

Shankeeth Vinayahalingam,

Costantino Grana; [pdf]
[bibtex]
@InProceedings{Bolelli_2025_CVPR, author = {Bolelli, Federico and Marchesini, Kevin and van Nistelrooij, Niels and Lumetti, Luca and Pipoli, Vittorio and Ficarra, Elisa and Vinayahalingam, Shankeeth and Grana, Costantino}, title = {Segmenting Maxillofacial Structures in CBCT Volumes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5238-5248} }
3D Dental Model Segmentation with Geometrical Boundary Preserving: Shufan Xi,

Zexian Liu,

Junlin Chang,

Hongyu Wu,

Xiaogang Wang,

Aimin Hao; [pdf] [arXiv]
[bibtex]
@InProceedings{Xi_2025_CVPR, author = {Xi, Shufan and Liu, Zexian and Chang, Junlin and Wu, Hongyu and Wang, Xiaogang and Hao, Aimin}, title = {3D Dental Model Segmentation with Geometrical Boundary Preserving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10476-10485} }
Neuro-3D: Towards 3D Visual Decoding from EEG Signals: Zhanqiang Guo,

Jiamin Wu,

Yonghao Song,

Jiahui Bu,

Weijian Mai,

Qihao Zheng,

Wanli Ouyang,

Chunfeng Song; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zhanqiang and Wu, Jiamin and Song, Yonghao and Bu, Jiahui and Mai, Weijian and Zheng, Qihao and Ouyang, Wanli and Song, Chunfeng}, title = {Neuro-3D: Towards 3D Visual Decoding from EEG Signals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23870-23880} }
FastVLM: Efficient Vision Encoding for Vision Language Models: Pavan Kumar Anasosalu Vasu,

Fartash Faghri,

Chun-Liang Li,

Cem Koc,

Nate True,

Albert Antony,

Gokula Santhanam,

James Gabriel,

Peter Grasch,

Oncel Tuzel,

Hadi Pouransari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vasu_2025_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Faghri, Fartash and Li, Chun-Liang and Koc, Cem and True, Nate and Antony, Albert and Santhanam, Gokula and Gabriel, James and Grasch, Peter and Tuzel, Oncel and Pouransari, Hadi}, title = {FastVLM: Efficient Vision Encoding for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19769-19780} }
VISTA3D: A Unified Segmentation Foundation Model For 3D Medical Imaging: Yufan He,

Pengfei Guo,

Yucheng Tang,

Andriy Myronenko,

Vishwesh Nath,

Ziyue Xu,

Dong Yang,

Can Zhao,

Benjamin Simon,

Mason Belue,

Stephanie Harmon,

Baris Turkbey,

Daguang Xu,

Wenqi Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Yufan and Guo, Pengfei and Tang, Yucheng and Myronenko, Andriy and Nath, Vishwesh and Xu, Ziyue and Yang, Dong and Zhao, Can and Simon, Benjamin and Belue, Mason and Harmon, Stephanie and Turkbey, Baris and Xu, Daguang and Li, Wenqi}, title = {VISTA3D: A Unified Segmentation Foundation Model For 3D Medical Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20863-20873} }
VideoGigaGAN: Towards Detail-rich Video Super-Resolution: Yiran Xu,

Taesung Park,

Richard Zhang,

Yang Zhou,

Eli Shechtman,

Feng Liu,

Jia-Bin Huang,

Difan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yiran and Park, Taesung and Zhang, Richard and Zhou, Yang and Shechtman, Eli and Liu, Feng and Huang, Jia-Bin and Liu, Difan}, title = {VideoGigaGAN: Towards Detail-rich Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2139-2149} }
Probing the Mid-level Vision Capabilities of Self-Supervised Learning: Xuweiyi Chen,

Markus Marks,

Zezhou Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xuweiyi and Marks, Markus and Cheng, Zezhou}, title = {Probing the Mid-level Vision Capabilities of Self-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30095-30105} }
S2D-LFE: Sparse-to-Dense Light Field Event Generation: Yutong Liu,

Wenming Weng,

Yueyi Zhang,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yutong and Weng, Wenming and Zhang, Yueyi and Xiong, Zhiwei}, title = {S2D-LFE: Sparse-to-Dense Light Field Event Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11207-11216} }
The Art of Deception: Color Visual Illusions and Diffusion Models: Alexandra Gomez-Villa,

Kai Wang,

C.Alejandro Parraga,

Bartłomiej Twardowski,

Jesus Malo,

Javier Vazquez-Corral,

Joost van den Weijer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gomez-Villa_2025_CVPR, author = {Gomez-Villa, Alexandra and Wang, Kai and Parraga, C.Alejandro and Twardowski, Bart{\l}omiej and Malo, Jesus and Vazquez-Corral, Javier and van den Weijer, Joost}, title = {The Art of Deception: Color Visual Illusions and Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18642-18652} }
GLUS: Global-Local Reasoning Unified into A Single Large Language Model for Video Segmentation: Lang Lin,

Xueyang Yu,

Ziqi Pang,

Yu-Xiong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Lang and Yu, Xueyang and Pang, Ziqi and Wang, Yu-Xiong}, title = {GLUS: Global-Local Reasoning Unified into A Single Large Language Model for Video Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8658-8667} }
Progressive Rendering Distillation: Adapting Stable Diffusion for Instant Text-to-Mesh Generation without 3D Data: Zhiyuan Ma,

Xinyue Liang,

Rongyuan Wu,

Xiangyu Zhu,

Zhen Lei,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Zhiyuan and Liang, Xinyue and Wu, Rongyuan and Zhu, Xiangyu and Lei, Zhen and Zhang, Lei}, title = {Progressive Rendering Distillation: Adapting Stable Diffusion for Instant Text-to-Mesh Generation without 3D Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11036-11050} }
Efficient Long Video Tokenization via Coordinate-based Patch Reconstruction: Huiwon Jang,

Sihyun Yu,

Jinwoo Shin,

Pieter Abbeel,

Younggyo Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Huiwon and Yu, Sihyun and Shin, Jinwoo and Abbeel, Pieter and Seo, Younggyo}, title = {Efficient Long Video Tokenization via Coordinate-based Patch Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22853-22863} }
Derivative-Free Diffusion Manifold-Constrained Gradient for Unified XAI: Won Jun Kim,

Hyungjin Chung,

Jaemin Kim,

Sangmin Lee,

Byeongsu Sim,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Won Jun and Chung, Hyungjin and Kim, Jaemin and Lee, Sangmin and Sim, Byeongsu and Ye, Jong Chul}, title = {Derivative-Free Diffusion Manifold-Constrained Gradient for Unified XAI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23795-23805} }
ZoomLDM: Latent Diffusion Model for Multi-scale Image Generation: Srikar Yellapragada,

Alexandros Graikos,

Kostas Triaridis,

Prateek Prasanna,

Rajarsi Gupta,

Joel Saltz,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yellapragada_2025_CVPR, author = {Yellapragada, Srikar and Graikos, Alexandros and Triaridis, Kostas and Prasanna, Prateek and Gupta, Rajarsi and Saltz, Joel and Samaras, Dimitris}, title = {ZoomLDM: Latent Diffusion Model for Multi-scale Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23453-23463} }
Do Computer Vision Foundation Models Learn the Low-level Characteristics of the Human Visual System?: Yancheng Cai,

Fei Yin,

Dounia Hammou,

Rafal Mantiuk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Yancheng and Yin, Fei and Hammou, Dounia and Mantiuk, Rafal}, title = {Do Computer Vision Foundation Models Learn the Low-level Characteristics of the Human Visual System?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20039-20048} }
GaussianUDF: Inferring Unsigned Distance Functions through 3D Gaussian Splatting: Shujuan Li,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shujuan and Liu, Yu-Shen and Han, Zhizhong}, title = {GaussianUDF: Inferring Unsigned Distance Functions through 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27113-27123} }
Towards RAW Object Detection in Diverse Conditions: Zhong-Yu Li,

Xin Jin,

Bo-Yuan Sun,

Chun-Le Guo,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhong-Yu and Jin, Xin and Sun, Bo-Yuan and Guo, Chun-Le and Cheng, Ming-Ming}, title = {Towards RAW Object Detection in Diverse Conditions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8859-8868} }
FLAME: Frozen Large Language Models Enable Data-Efficient Language-Image Pre-training: Anjia Cao,

Xing Wei,

Zhiheng Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Anjia and Wei, Xing and Ma, Zhiheng}, title = {FLAME: Frozen Large Language Models Enable Data-Efficient Language-Image Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4080-4090} }
CrossSDF: 3D Reconstruction of Thin Structures From Cross-Sections: Thomas Walker,

Salvatore Esposito,

Daniel Rebain,

Amir Vaxman,

Arno Onken,

Changjian Li,

Oisin Mac Aodha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Walker_2025_CVPR, author = {Walker, Thomas and Esposito, Salvatore and Rebain, Daniel and Vaxman, Amir and Onken, Arno and Li, Changjian and Mac Aodha, Oisin}, title = {CrossSDF: 3D Reconstruction of Thin Structures From Cross-Sections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30928-30937} }
DV-Matcher: Deformation-based Non-rigid Point Cloud Matching Guided by Pre-trained Visual Features: Zhangquan Chen,

Puhua Jiang,

Ruqi Huang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhangquan and Jiang, Puhua and Huang, Ruqi}, title = {DV-Matcher: Deformation-based Non-rigid Point Cloud Matching Guided by Pre-trained Visual Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27264-27274} }
Reasoning Mamba: Hypergraph-Guided Region Relation Calculating for Weakly Supervised Affordance Grounding: Yuxuan Wang,

Aming Wu,

Muli Yang,

Yukuan Min,

Yihang Zhu,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuxuan and Wu, Aming and Yang, Muli and Min, Yukuan and Zhu, Yihang and Deng, Cheng}, title = {Reasoning Mamba: Hypergraph-Guided Region Relation Calculating for Weakly Supervised Affordance Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27618-27627} }
Adapter Merging with Centroid Prototype Mapping for Scalable Class-Incremental Learning: Takuma Fukuda,

Hiroshi Kera,

Kazuhiko Kawamoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fukuda_2025_CVPR, author = {Fukuda, Takuma and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Adapter Merging with Centroid Prototype Mapping for Scalable Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4884-4893} }
OpenSDI: Spotting Diffusion-Generated Images in the Open World: Yabin Wang,

Zhiwu Huang,

Xiaopeng Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yabin and Huang, Zhiwu and Hong, Xiaopeng}, title = {OpenSDI: Spotting Diffusion-Generated Images in the Open World}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4291-4301} }
Adaptive Part Learning for Fine-Grained Generalized Category Discovery: A Plug-and-Play Enhancement: Qiyuan Dai,

Hanzhuo Huang,

Yu Wu,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyuan and Huang, Hanzhuo and Wu, Yu and Yang, Sibei}, title = {Adaptive Part Learning for Fine-Grained Generalized Category Discovery: A Plug-and-Play Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25444-25453} }
Online Task-Free Continual Learning via Dynamic Expansionable Memory Distribution: Fei Ye,

Adrian G. Bors; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Fei and Bors, Adrian G.}, title = {Online Task-Free Continual Learning via Dynamic Expansionable Memory Distribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20512-20522} }
Lux Post Facto: Learning Portrait Performance Relighting with Conditional Video Diffusion and a Hybrid Dataset: Yiqun Mei,

Mingming He,

Li Ma,

Julien Philip,

Wenqi Xian,

David M George,

Xueming Yu,

Gabriel Dedic,

Ahmet Levent Taşel,

Ning Yu,

Vishal M. Patel,

Paul Debevec; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Yiqun and He, Mingming and Ma, Li and Philip, Julien and Xian, Wenqi and George, David M and Yu, Xueming and Dedic, Gabriel and Ta\c{s}el, Ahmet Levent and Yu, Ning and Patel, Vishal M. and Debevec, Paul}, title = {Lux Post Facto: Learning Portrait Performance Relighting with Conditional Video Diffusion and a Hybrid Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5510-5522} }
DiG: Scalable and Efficient Diffusion Models with Gated Linear Attention: Lianghui Zhu,

Zilong Huang,

Bencheng Liao,

Jun Hao Liew,

Hanshu Yan,

Jiashi Feng,

Xinggang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lianghui and Huang, Zilong and Liao, Bencheng and Liew, Jun Hao and Yan, Hanshu and Feng, Jiashi and Wang, Xinggang}, title = {DiG: Scalable and Efficient Diffusion Models with Gated Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7664-7674} }
Monocular and Generalizable Gaussian Talking Head Animation: Shengjie Gong,

Haojie Li,

Jiapeng Tang,

Dongming Hu,

Shuangping Huang,

Hao Chen,

Tianshui Chen,

Zhuoman Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_CVPR, author = {Gong, Shengjie and Li, Haojie and Tang, Jiapeng and Hu, Dongming and Huang, Shuangping and Chen, Hao and Chen, Tianshui and Liu, Zhuoman}, title = {Monocular and Generalizable Gaussian Talking Head Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5523-5534} }
Rethinking Token Reduction with Parameter-Efficient Fine-Tuning in ViT for Pixel-Level Tasks: Cheng Lei,

Ao Li,

Hu Yao,

Ce Zhu,

Le Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Cheng and Li, Ao and Yao, Hu and Zhu, Ce and Zhang, Le}, title = {Rethinking Token Reduction with Parameter-Efficient Fine-Tuning in ViT for Pixel-Level Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14954-14964} }
SVDC: Consistent Direct Time-of-Flight Video Depth Completion with Frequency Selective Fusion: Xuan Zhu,

Jijun Xiang,

Xianqi Wang,

Longliang Liu,

Yu Wang,

Hong Zhang,

Fei Guo,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Xuan and Xiang, Jijun and Wang, Xianqi and Liu, Longliang and Wang, Yu and Zhang, Hong and Guo, Fei and Yang, Xin}, title = {SVDC: Consistent Direct Time-of-Flight Video Depth Completion with Frequency Selective Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16619-16628} }
Locally Orderless Images for Optimization in Differentiable Rendering: Ishit Mehta,

Manmohan Chandraker,

Ravi Ramamoorthi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mehta_2025_CVPR, author = {Mehta, Ishit and Chandraker, Manmohan and Ramamoorthi, Ravi}, title = {Locally Orderless Images for Optimization in Differentiable Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5763-5772} }
Plug-and-Play Interpretable Responsible Text-to-Image Generation via Dual-Space Multi-facet Concept Control: Basim Azam,

Naveed Akhtar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Azam_2025_CVPR, author = {Azam, Basim and Akhtar, Naveed}, title = {Plug-and-Play Interpretable Responsible Text-to-Image Generation via Dual-Space Multi-facet Concept Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2976-2985} }
Rethinking Training for De-biasing Text-to-Image Generation: Unlocking the Potential of Stable Diffusion: Eunji Kim,

Siwon Kim,

Minjun Park,

Rahim Entezari,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Eunji and Kim, Siwon and Park, Minjun and Entezari, Rahim and Yoon, Sungroh}, title = {Rethinking Training for De-biasing Text-to-Image Generation: Unlocking the Potential of Stable Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13361-13370} }
FLAIR: VLM with Fine-grained Language-informed Image Representations: Rui Xiao,

Sanghwan Kim,

Mariana-Iuliana Georgescu,

Zeynep Akata,

Stephan Alaniz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Rui and Kim, Sanghwan and Georgescu, Mariana-Iuliana and Akata, Zeynep and Alaniz, Stephan}, title = {FLAIR: VLM with Fine-grained Language-informed Image Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24884-24894} }
GG-SSMs: Graph-Generating State Space Models: Nikola Zubic,

Davide Scaramuzza; [pdf] [supp]
[bibtex]
@InProceedings{Zubic_2025_CVPR, author = {Zubic, Nikola and Scaramuzza, Davide}, title = {GG-SSMs: Graph-Generating State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28863-28873} }
Instant3dit: Multiview Inpainting for Fast Editing of 3D Objects: Amir Barda,

Matheus Gadelha,

Vladimir G. Kim,

Noam Aigerman,

Amit H. Bermano,

Thibault Groueix; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barda_2025_CVPR, author = {Barda, Amir and Gadelha, Matheus and Kim, Vladimir G. and Aigerman, Noam and Bermano, Amit H. and Groueix, Thibault}, title = {Instant3dit: Multiview Inpainting for Fast Editing of 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16273-16282} }
STDD: Spatio-Temporal Dual Diffusion for Video Generation: Shuaizhen Yao,

Xiaoya Zhang,

Xin Liu,

Mengyi Liu,

Zhen Cui; [pdf] [supp]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Shuaizhen and Zhang, Xiaoya and Liu, Xin and Liu, Mengyi and Cui, Zhen}, title = {STDD: Spatio-Temporal Dual Diffusion for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12575-12584} }
Implicit Correspondence Learning for Image-to-Point Cloud Registration: Xinjun Li,

Wenfei Yang,

Jiacheng Deng,

Zhixin Cheng,

Xu Zhou,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xinjun and Yang, Wenfei and Deng, Jiacheng and Cheng, Zhixin and Zhou, Xu and Zhang, Tianzhu}, title = {Implicit Correspondence Learning for Image-to-Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16922-16931} }
Continuous Adverse Weather Removal via Degradation-Aware Distillation: Xin Lu,

Jie Xiao,

Yurui Zhu,

Xueyang Fu; [pdf]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Xin and Xiao, Jie and Zhu, Yurui and Fu, Xueyang}, title = {Continuous Adverse Weather Removal via Degradation-Aware Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28113-28123} }
Fine-Grained Erasure in Text-to-Image Diffusion-based Foundation Models: Kartik Thakral,

Tamar Glaser,

Tal Hassner,

Mayank Vatsa,

Richa Singh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thakral_2025_CVPR, author = {Thakral, Kartik and Glaser, Tamar and Hassner, Tal and Vatsa, Mayank and Singh, Richa}, title = {Fine-Grained Erasure in Text-to-Image Diffusion-based Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9121-9130} }
ILIAS: Instance-Level Image retrieval At Scale: Giorgos Kordopatis-Zilos,

Vladan Stojnić,

Anna Manko,

Pavel Suma,

Nikolaos-Antonios Ypsilantis,

Nikos Efthymiadis,

Zakaria Laskar,

Jiri Matas,

Ondrej Chum,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Kordopatis-Zilos_2025_CVPR, author = {Kordopatis-Zilos, Giorgos and Stojni\'c, Vladan and Manko, Anna and Suma, Pavel and Ypsilantis, Nikolaos-Antonios and Efthymiadis, Nikos and Laskar, Zakaria and Matas, Jiri and Chum, Ondrej and Tolias, Giorgos}, title = {ILIAS: Instance-Level Image retrieval At Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14777-14787} }
Exploiting Temporal State Space Sharing for Video Semantic Segmentation: Syed Ariff Syed Hesham,

Yun Liu,

Guolei Sun,

Henghui Ding,

Jing Yang,

Ender Konukoglu,

Xue Geng,

Xudong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hesham_2025_CVPR, author = {Hesham, Syed Ariff Syed and Liu, Yun and Sun, Guolei and Ding, Henghui and Yang, Jing and Konukoglu, Ender and Geng, Xue and Jiang, Xudong}, title = {Exploiting Temporal State Space Sharing for Video Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24211-24221} }
DeRS: Towards Extremely Efficient Upcycled Mixture-of-Experts Models: Yongqi Huang,

Peng Ye,

Chenyu Huang,

Jianjian Cao,

Lin Zhang,

Baopu Li,

Gang Yu,

Tao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yongqi and Ye, Peng and Huang, Chenyu and Cao, Jianjian and Zhang, Lin and Li, Baopu and Yu, Gang and Chen, Tao}, title = {DeRS: Towards Extremely Efficient Upcycled Mixture-of-Experts Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10056-10066} }
GeoDepth: From Point-to-Depth to Plane-to-Depth Modeling for Self-Supervised Monocular Depth Estimation: Haifeng Wu,

Shuhang Gu,

Lixin Duan,

Wen Li; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Haifeng and Gu, Shuhang and Duan, Lixin and Li, Wen}, title = {GeoDepth: From Point-to-Depth to Plane-to-Depth Modeling for Self-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11525-11535} }
SSHNet: Unsupervised Cross-modal Homography Estimation via Problem Reformulation and Split Optimization: Junchen Yu,

Si-Yuan Cao,

Runmin Zhang,

Chenghao Zhang,

Zhu Yu,

Shujie Chen,

Bailin Yang,

Hui-Liang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Junchen and Cao, Si-Yuan and Zhang, Runmin and Zhang, Chenghao and Yu, Zhu and Chen, Shujie and Yang, Bailin and Shen, Hui-Liang}, title = {SSHNet: Unsupervised Cross-modal Homography Estimation via Problem Reformulation and Split Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16685-16694} }
High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model: Yiyang Shen,

Kun Zhou,

He Wang,

Yin Yang,

Tianjia Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Yiyang and Zhou, Kun and Wang, He and Yang, Yin and Shao, Tianjia}, title = {High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21558-21569} }
Steepest Descent Density Control for Compact 3D Gaussian Splatting: Peihao Wang,

Yuehao Wang,

Dilin Wang,

Sreyas Mohan,

Zhiwen Fan,

Lemeng Wu,

Ruisi Cai,

Yu-Ying Yeh,

Zhangyang Wang,

Qiang Liu,

Rakesh Ranjan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Peihao and Wang, Yuehao and Wang, Dilin and Mohan, Sreyas and Fan, Zhiwen and Wu, Lemeng and Cai, Ruisi and Yeh, Yu-Ying and Wang, Zhangyang and Liu, Qiang and Ranjan, Rakesh}, title = {Steepest Descent Density Control for Compact 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26663-26672} }
Optimal Transport-Guided Source-Free Adaptation for Face Anti-Spoofing: Zhuowei Li,

Tianchen Zhao,

Xiang Xu,

Zheng Zhang,

Zhihua Li,

Xuanbai Chen,

Qin Zhang,

Alessandro Bergamo,

Anil K. Jain,

Yifan Xing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhuowei and Zhao, Tianchen and Xu, Xiang and Zhang, Zheng and Li, Zhihua and Chen, Xuanbai and Zhang, Qin and Bergamo, Alessandro and Jain, Anil K. and Xing, Yifan}, title = {Optimal Transport-Guided Source-Free Adaptation for Face Anti-Spoofing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24351-24363} }
USP-Gaussian: Unifying Spike-based Image Reconstruction, Pose Correction and Gaussian Splatting: Kang Chen,

Jiyuan Zhang,

Zecheng Hao,

Yajing Zheng,

Tiejun Huang,

Zhaofei Yu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kang and Zhang, Jiyuan and Hao, Zecheng and Zheng, Yajing and Huang, Tiejun and Yu, Zhaofei}, title = {USP-Gaussian: Unifying Spike-based Image Reconstruction, Pose Correction and Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16609-16618} }
Robust 3D Shape Reconstruction in Zero-Shot from a Single Image in the Wild: Junhyeong Cho,

Kim Youwang,

Hunmin Yang,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_CVPR, author = {Cho, Junhyeong and Youwang, Kim and Yang, Hunmin and Oh, Tae-Hyun}, title = {Robust 3D Shape Reconstruction in Zero-Shot from a Single Image in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22786-22798} }
BOE-ViT: Boosting Orientation Estimation with Equivariance in Self-Supervised 3D Subtomogram Alignment: Runmin Jiang,

Jackson Daggett,

Shriya Pingulkar,

Yizhou Zhao,

Priyanshu Dhingra,

Daniel Brown,

Qifeng Wu,

Xiangrui Zeng,

Xingjian Li,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Runmin and Daggett, Jackson and Pingulkar, Shriya and Zhao, Yizhou and Dhingra, Priyanshu and Brown, Daniel and Wu, Qifeng and Zeng, Xiangrui and Li, Xingjian and Xu, Min}, title = {BOE-ViT: Boosting Orientation Estimation with Equivariance in Self-Supervised 3D Subtomogram Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29352-29362} }
Holmes-VAU: Towards Long-term Video Anomaly Understanding at Any Granularity: Huaxin Zhang,

Xiaohao Xu,

Xiang Wang,

Jialong Zuo,

Xiaonan Huang,

Changxin Gao,

Shanjun Zhang,

Li Yu,

Nong Sang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Huaxin and Xu, Xiaohao and Wang, Xiang and Zuo, Jialong and Huang, Xiaonan and Gao, Changxin and Zhang, Shanjun and Yu, Li and Sang, Nong}, title = {Holmes-VAU: Towards Long-term Video Anomaly Understanding at Any Granularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13843-13853} }
Adventurer: Optimizing Vision Mamba Architecture Designs for Efficiency: Feng Wang,

Timing Yang,

Yaodong Yu,

Sucheng Ren,

Guoyizhe Wei,

Angtian Wang,

Wei Shao,

Yuyin Zhou,

Alan Yuille,

Cihang Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Feng and Yang, Timing and Yu, Yaodong and Ren, Sucheng and Wei, Guoyizhe and Wang, Angtian and Shao, Wei and Zhou, Yuyin and Yuille, Alan and Xie, Cihang}, title = {Adventurer: Optimizing Vision Mamba Architecture Designs for Efficiency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30157-30166} }
Beyond Local Sharpness: Communication-Efficient Global Sharpness-aware Minimization for Federated Learning: Debora Caldarola,

Pietro Cagnasso,

Barbara Caputo,

Marco Ciccone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Caldarola_2025_CVPR, author = {Caldarola, Debora and Cagnasso, Pietro and Caputo, Barbara and Ciccone, Marco}, title = {Beyond Local Sharpness: Communication-Efficient Global Sharpness-aware Minimization for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25187-25197} }
ALIEN: Implicit Neural Representations for Human Motion Prediction under Arbitrary Latency: Dong Wei,

Xiaoning Sun,

Xizhan Gao,

Shengxiang Hu,

Huaijiang Sun; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Dong and Sun, Xiaoning and Gao, Xizhan and Hu, Shengxiang and Sun, Huaijiang}, title = {ALIEN: Implicit Neural Representations for Human Motion Prediction under Arbitrary Latency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1861-1870} }
Parameterized Blur Kernel Prior Learning for Local Motion Deblurring: Zhenxuan Fang,

Fangfang Wu,

Tao Huang,

Le Dong,

Weisheng Dong,

Xin Li,

Guangming Shi; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Zhenxuan and Wu, Fangfang and Huang, Tao and Dong, Le and Dong, Weisheng and Li, Xin and Shi, Guangming}, title = {Parameterized Blur Kernel Prior Learning for Local Motion Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23006-23015} }
QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge: Xuan Shen,

Weize Ma,

Jing Liu,

Changdi Yang,

Rui Ding,

Quanyi Wang,

Henghui Ding,

Wei Niu,

Yanzhi Wang,

Pu Zhao,

Jun Lin,

Jiuxiang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Xuan and Ma, Weize and Liu, Jing and Yang, Changdi and Ding, Rui and Wang, Quanyi and Ding, Henghui and Niu, Wei and Wang, Yanzhi and Zhao, Pu and Lin, Jun and Gu, Jiuxiang}, title = {QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11448-11460} }
ReWind: Understanding Long Videos with Instructed Learnable Memory: Anxhelo Diko,

Tinghuai Wang,

Wassim Swaileh,

Shiyan Sun,

Ioannis Patras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Diko_2025_CVPR, author = {Diko, Anxhelo and Wang, Tinghuai and Swaileh, Wassim and Sun, Shiyan and Patras, Ioannis}, title = {ReWind: Understanding Long Videos with Instructed Learnable Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13734-13743} }
Sufficient Invariant Learning for Distribution Shift: Taero Kim,

Subeen Park,

Sungjun Lim,

Yonghan Jung,

Krikamol Muandet,

Kyungwoo Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Taero and Park, Subeen and Lim, Sungjun and Jung, Yonghan and Muandet, Krikamol and Song, Kyungwoo}, title = {Sufficient Invariant Learning for Distribution Shift}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4958-4967} }
DirectTriGS: Triplane-based Gaussian Splatting Field Representation for 3D Generation: Xiaoliang Ju,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ju_2025_CVPR, author = {Ju, Xiaoliang and Li, Hongsheng}, title = {DirectTriGS: Triplane-based Gaussian Splatting Field Representation for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16229-16239} }
Domain Generalization in CLIP via Learning with Diverse Text Prompts: Changsong Wen,

Zelin Peng,

Yu Huang,

Xiaokang Yang,

Wei Shen; [pdf]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Changsong and Peng, Zelin and Huang, Yu and Yang, Xiaokang and Shen, Wei}, title = {Domain Generalization in CLIP via Learning with Diverse Text Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9559-9569} }
Scene4U: Hierarchical Layered 3D Scene Reconstruction from Single Panoramic Image for Your Immerse Exploration: Zilong Huang,

Jun He,

Junyan Ye,

Lihan Jiang,

Weijia Li,

Yiping Chen,

Ting Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zilong and He, Jun and Ye, Junyan and Jiang, Lihan and Li, Weijia and Chen, Yiping and Han, Ting}, title = {Scene4U: Hierarchical Layered 3D Scene Reconstruction from Single Panoramic Image for Your Immerse Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26723-26733} }
Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters: Zhiyang Guo,

Jinxu Xiang,

Kai Ma,

Wengang Zhou,

Houqiang Li,

Ran Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zhiyang and Xiang, Jinxu and Ma, Kai and Zhou, Wengang and Li, Houqiang and Zhang, Ran}, title = {Make-It-Animatable: An Efficient Framework for Authoring Animation-Ready 3D Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10783-10792} }
IterIS: Iterative Inference-Solving Alignment for LoRA Merging: Hongxu Chen,

Zhen Wang,

Runshi Li,

Bowei Zhu,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Hongxu and Wang, Zhen and Li, Runshi and Zhu, Bowei and Chen, Long}, title = {IterIS: Iterative Inference-Solving Alignment for LoRA Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4829-4838} }
ACAttack: Adaptive Cross Attacking RGB-T Tracker via Multi-Modal Response Decoupling: Xinyu Xiang,

Qinglong Yan,

Hao Zhang,

Jiayi Ma; [pdf]
[bibtex]
@InProceedings{Xiang_2025_CVPR, author = {Xiang, Xinyu and Yan, Qinglong and Zhang, Hao and Ma, Jiayi}, title = {ACAttack: Adaptive Cross Attacking RGB-T Tracker via Multi-Modal Response Decoupling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22099-22108} }
DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in Long Videos: Zijia Lu,

A S M Iftekhar,

Gaurav Mittal,

Tianjian Meng,

Xiawei Wang,

Cheng Zhao,

Rohith Kukkala,

Ehsan Elhamifar,

Mei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Zijia and Iftekhar, A S M and Mittal, Gaurav and Meng, Tianjian and Wang, Xiawei and Zhao, Cheng and Kukkala, Rohith and Elhamifar, Ehsan and Chen, Mei}, title = {DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24066-24076} }
Efficient ANN-Guided Distillation: Aligning Rate-based Features of Spiking Neural Networks through Hybrid Block-wise Replacement: Shu Yang,

Chengting Yu,

Lei Liu,

Hanzhi Ma,

Aili Wang,

Erping Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Shu and Yu, Chengting and Liu, Lei and Ma, Hanzhi and Wang, Aili and Li, Erping}, title = {Efficient ANN-Guided Distillation: Aligning Rate-based Features of Spiking Neural Networks through Hybrid Block-wise Replacement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10025-10035} }
PrEditor3D: Fast and Precise 3D Shape Editing: Ziya Erkoç,

Can Gümeli,

Chaoyang Wang,

Matthias Nießner,

Angela Dai,

Peter Wonka,

Hsin-Ying Lee,

Peiye Zhuang; [pdf] [supp]
[bibtex]
@InProceedings{Erkoc_2025_CVPR, author = {Erko\c{c}, Ziya and G\"umeli, Can and Wang, Chaoyang and Nie{\ss}ner, Matthias and Dai, Angela and Wonka, Peter and Lee, Hsin-Ying and Zhuang, Peiye}, title = {PrEditor3D: Fast and Precise 3D Shape Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {640-649} }
Subspace Constraint and Contribution Estimation for Heterogeneous Federated Learning: Xiangtao Zhang,

Sheng Li,

Ao Li,

Yipeng Liu,

Fan Zhang,

Ce Zhu,

Le Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xiangtao and Li, Sheng and Li, Ao and Liu, Yipeng and Zhang, Fan and Zhu, Ce and Zhang, Le}, title = {Subspace Constraint and Contribution Estimation for Heterogeneous Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20632-20642} }
HoGS: Unified Near and Far Object Reconstruction via Homogeneous Gaussian Splatting: Xinpeng Liu,

Zeyi Huang,

Fumio Okura,

Yasuyuki Matsushita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xinpeng and Huang, Zeyi and Okura, Fumio and Matsushita, Yasuyuki}, title = {HoGS: Unified Near and Far Object Reconstruction via Homogeneous Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26714-26722} }
SmartEraser: Remove Anything from Images using Masked-Region Guidance: Longtao Jiang,

Zhendong Wang,

Jianmin Bao,

Wengang Zhou,

Dongdong Chen,

Lei Shi,

Dong Chen,

Houqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Longtao and Wang, Zhendong and Bao, Jianmin and Zhou, Wengang and Chen, Dongdong and Shi, Lei and Chen, Dong and Li, Houqiang}, title = {SmartEraser: Remove Anything from Images using Masked-Region Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24452-24462} }
ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices: Hao Yu,

Tangyu Jiang,

Shuning Jia,

Shannan Yan,

Shunning Liu,

Haolong Qian,

Guanghao Li,

Shuting Dong,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hao and Jiang, Tangyu and Jia, Shuning and Yan, Shannan and Liu, Shunning and Qian, Haolong and Li, Guanghao and Dong, Shuting and Yuan, Chun}, title = {ComRoPE: Scalable and Robust Rotary Position Embedding Parameterized by Trainable Commuting Angle Matrices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4508-4517} }
Sample- and Parameter-Efficient Auto-Regressive Image Models: Elad Amrani,

Leonid Karlinsky,

Alex Bronstein; [pdf] [supp]
[bibtex]
@InProceedings{Amrani_2025_CVPR, author = {Amrani, Elad and Karlinsky, Leonid and Bronstein, Alex}, title = {Sample- and Parameter-Efficient Auto-Regressive Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30127-30136} }
Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent Alignment: Chen Liu,

Peike Li,

Liying Yang,

Dadong Wang,

Lincheng Li,

Xin Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Chen and Li, Peike and Yang, Liying and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28922-28931} }
LOCORE: Image Re-ranking with Long-Context Sequence Modeling: Zilin Xiao,

Pavel Suma,

Ayush Sachdeva,

Hao-Jen Wang,

Giorgos Kordopatis-Zilos,

Giorgos Tolias,

Vicente Ordonez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Zilin and Suma, Pavel and Sachdeva, Ayush and Wang, Hao-Jen and Kordopatis-Zilos, Giorgos and Tolias, Giorgos and Ordonez, Vicente}, title = {LOCORE: Image Re-ranking with Long-Context Sequence Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9580-9590} }
NeRFPrior: Learning Neural Radiance Field as a Prior for Indoor Scene Reconstruction: Wenyuan Zhang,

Emily Yue-ting Jia,

Junsheng Zhou,

Baorui Ma,

Kanle Shi,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wenyuan and Jia, Emily Yue-ting and Zhou, Junsheng and Ma, Baorui and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {NeRFPrior: Learning Neural Radiance Field as a Prior for Indoor Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11317-11327} }
BiLoRA: Almost-Orthogonal Parameter Spaces for Continual Learning: Hao Zhu,

Yifei Zhang,

Junhao Dong,

Piotr Koniusz; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Hao and Zhang, Yifei and Dong, Junhao and Koniusz, Piotr}, title = {BiLoRA: Almost-Orthogonal Parameter Spaces for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25613-25622} }
Vid2Sim: Generalizable, Video-based Reconstruction of Appearance, Geometry and Physics for Mesh-free Simulation: Chuhao Chen,

Zhiyang Dou,

Chen Wang,

Yiming Huang,

Anjun Chen,

Qiao Feng,

Jiatao Gu,

Lingjie Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Chuhao and Dou, Zhiyang and Wang, Chen and Huang, Yiming and Chen, Anjun and Feng, Qiao and Gu, Jiatao and Liu, Lingjie}, title = {Vid2Sim: Generalizable, Video-based Reconstruction of Appearance, Geometry and Physics for Mesh-free Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26545-26555} }
SceneTAP: Scene-Coherent Typographic Adversarial Planner against Vision-Language Models in Real-World Environments: Yue Cao,

Yun Xing,

Jie Zhang,

Di Lin,

Tianwei Zhang,

Ivor Tsang,

Yang Liu,

Qing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Yue and Xing, Yun and Zhang, Jie and Lin, Di and Zhang, Tianwei and Tsang, Ivor and Liu, Yang and Guo, Qing}, title = {SceneTAP: Scene-Coherent Typographic Adversarial Planner against Vision-Language Models in Real-World Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25050-25059} }
Collaborative Decoding Makes Visual Auto-Regressive Modeling Efficient: Zigeng Chen,

Xinyin Ma,

Gongfan Fang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zigeng and Ma, Xinyin and Fang, Gongfan and Wang, Xinchao}, title = {Collaborative Decoding Makes Visual Auto-Regressive Modeling Efficient}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23334-23344} }
AerialMegaDepth: Learning Aerial-Ground Reconstruction and View Synthesis: Khiem Vuong,

Anurag Ghosh,

Deva Ramanan,

Srinivasa Narasimhan,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vuong_2025_CVPR, author = {Vuong, Khiem and Ghosh, Anurag and Ramanan, Deva and Narasimhan, Srinivasa and Tulsiani, Shubham}, title = {AerialMegaDepth: Learning Aerial-Ground Reconstruction and View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21674-21684} }
Towards Training-free Anomaly Detection with Vision and Language Foundation Models: Jinjin Zhang,

Guodong Wang,

Yizhou Jin,

Di Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinjin and Wang, Guodong and Jin, Yizhou and Huang, Di}, title = {Towards Training-free Anomaly Detection with Vision and Language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15204-15213} }
LiVOS: Light Video Object Segmentation with Gated Linear Matching: Qin Liu,

Jianfeng Wang,

Zhengyuan Yang,

Linjie Li,

Kevin Lin,

Marc Niethammer,

Lijuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Qin and Wang, Jianfeng and Yang, Zhengyuan and Li, Linjie and Lin, Kevin and Niethammer, Marc and Wang, Lijuan}, title = {LiVOS: Light Video Object Segmentation with Gated Linear Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8668-8678} }
Dynamic Content Prediction with Motion-aware Priors for Blind Face Video Restoration: Lianxin Xie,

Bingbing Zheng,

Si Wu,

Hau San Wong; [pdf]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Lianxin and Zheng, Bingbing and Wu, Si and Wong, Hau San}, title = {Dynamic Content Prediction with Motion-aware Priors for Blind Face Video Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17821-17830} }
Polarized Color Screen Matting: Kenji Enomoto,

Scott Cohen,

Brian Price,

TJ Rhodes; [pdf] [supp]
[bibtex]
@InProceedings{Enomoto_2025_CVPR, author = {Enomoto, Kenji and Cohen, Scott and Price, Brian and Rhodes, TJ}, title = {Polarized Color Screen Matting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {391-399} }
Visual Representation Learning through Causal Intervention for Controllable Image Editing: Shanshan Huang,

Haoxuan Li,

Chunyuan Zheng,

Lei Wang,

Guorui Liao,

Zhili Gong,

Huayi Yang,

Li Liu; [pdf]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Shanshan and Li, Haoxuan and Zheng, Chunyuan and Wang, Lei and Liao, Guorui and Gong, Zhili and Yang, Huayi and Liu, Li}, title = {Visual Representation Learning through Causal Intervention for Controllable Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23484-23493} }
Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis: Bingda Tang,

Boyang Zheng,

Sayak Paul,

Saining Xie; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Bingda and Zheng, Boyang and Paul, Sayak and Xie, Saining}, title = {Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28586-28595} }
A Comprehensive Study of Decoder-Only LLMs for Text-to-Image Generation: Andrew Z. Wang,

Songwei Ge,

Tero Karras,

Ming-Yu Liu,

Yogesh Balaji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Andrew Z. and Ge, Songwei and Karras, Tero and Liu, Ming-Yu and Balaji, Yogesh}, title = {A Comprehensive Study of Decoder-Only LLMs for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28575-28585} }
Exploring Sparse MoE in GANs for Text-conditioned Image Synthesis: Jiapeng Zhu,

Ceyuan Yang,

Kecheng Zheng,

Yinghao Xu,

Zifan Shi,

Yifei Zhang,

Qifeng Chen,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jiapeng and Yang, Ceyuan and Zheng, Kecheng and Xu, Yinghao and Shi, Zifan and Zhang, Yifei and Chen, Qifeng and Shen, Yujun}, title = {Exploring Sparse MoE in GANs for Text-conditioned Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18411-18423} }
Deformable Radial Kernel Splatting: Yi-Hua Huang,

Ming-Xian Lin,

Yang-Tian Sun,

Ziyi Yang,

Xiaoyang Lyu,

Yan-Pei Cao,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yi-Hua and Lin, Ming-Xian and Sun, Yang-Tian and Yang, Ziyi and Lyu, Xiaoyang and Cao, Yan-Pei and Qi, Xiaojuan}, title = {Deformable Radial Kernel Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21513-21523} }
GOAL: Global-local Object Alignment Learning: Hyungyu Choi,

Young Kyun Jang,

Chanho Eom; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Hyungyu and Jang, Young Kyun and Eom, Chanho}, title = {GOAL: Global-local Object Alignment Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4070-4079} }
Bayesian Prompt Flow Learning for Zero-Shot Anomaly Detection: Zhen Qu,

Xian Tao,

Xinyi Gong,

ShiChen Qu,

Qiyu Chen,

Zhengtao Zhang,

Xingang Wang,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Zhen and Tao, Xian and Gong, Xinyi and Qu, ShiChen and Chen, Qiyu and Zhang, Zhengtao and Wang, Xingang and Ding, Guiguang}, title = {Bayesian Prompt Flow Learning for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30398-30408} }
Post-pre-training for Modality Alignment in Vision-Language Foundation Models: Shin'ya Yamaguchi,

Dewei Feng,

Sekitoshi Kanai,

Kazuki Adachi,

Daiki Chijiwa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamaguchi_2025_CVPR, author = {Yamaguchi, Shin'ya and Feng, Dewei and Kanai, Sekitoshi and Adachi, Kazuki and Chijiwa, Daiki}, title = {Post-pre-training for Modality Alignment in Vision-Language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4256-4266} }
Efficient Event-Based Object Detection: A Hybrid Neural Network with Spatial and Temporal Attention: Soikat Hasan Ahmed,

Jan Finkbeiner,

Emre Neftci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Soikat Hasan and Finkbeiner, Jan and Neftci, Emre}, title = {Efficient Event-Based Object Detection: A Hybrid Neural Network with Spatial and Temporal Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13970-13979} }
SynthLight: Portrait Relighting with Diffusion Model by Learning to Re-render Synthetic Faces: Sumit Chaturvedi,

Mengwei Ren,

Yannick Hold-Geoffroy,

Jingyuan Liu,

Julie Dorsey,

Zhixin Shu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chaturvedi_2025_CVPR, author = {Chaturvedi, Sumit and Ren, Mengwei and Hold-Geoffroy, Yannick and Liu, Jingyuan and Dorsey, Julie and Shu, Zhixin}, title = {SynthLight: Portrait Relighting with Diffusion Model by Learning to Re-render Synthetic Faces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {369-379} }
Pseudo Visible Feature Fine-Grained Fusion for Thermal Object Detection: Ting Li,

Mao Ye,

Tianwen Wu,

Nianxin Li,

Shuaifeng Li,

Song Tang,

Luping Ji; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ting and Ye, Mao and Wu, Tianwen and Li, Nianxin and Li, Shuaifeng and Tang, Song and Ji, Luping}, title = {Pseudo Visible Feature Fine-Grained Fusion for Thermal Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6710-6719} }
HUNet: Homotopy Unfolding Network for Image Compressive Sensing: Feiyang Shen,

Hongping Gan; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Feiyang and Gan, Hongping}, title = {HUNet: Homotopy Unfolding Network for Image Compressive Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12799-12808} }
HalLoc: Token-level Localization of Hallucinations for Vision Language Models: Eunkyu Park,

Minyeong Kim,

Gunhee Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Eunkyu and Kim, Minyeong and Kim, Gunhee}, title = {HalLoc: Token-level Localization of Hallucinations for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29893-29903} }
DiffPortrait360: Consistent Portrait Diffusion for 360 View Synthesis: Yuming Gu,

Phong Tran,

Yujian Zheng,

Hongyi Xu,

Heyuan Li,

Adilbek Karmanov,

Hao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yuming and Tran, Phong and Zheng, Yujian and Xu, Hongyi and Li, Heyuan and Karmanov, Adilbek and Li, Hao}, title = {DiffPortrait360: Consistent Portrait Diffusion for 360 View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26263-26273} }
SURGEON: Memory-Adaptive Fully Test-Time Adaptation via Dynamic Activation Sparsity: Ke Ma,

Jiaqi Tang,

Bin Guo,

Fan Dang,

Sicong Liu,

Zhui Zhu,

Lei Wu,

Cheng Fang,

Ying-Cong Chen,

Zhiwen Yu,

Yunhao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Ke and Tang, Jiaqi and Guo, Bin and Dang, Fan and Liu, Sicong and Zhu, Zhui and Wu, Lei and Fang, Cheng and Chen, Ying-Cong and Yu, Zhiwen and Liu, Yunhao}, title = {SURGEON: Memory-Adaptive Fully Test-Time Adaptation via Dynamic Activation Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30514-30523} }
NVILA: Efficient Frontier Visual Language Models: Zhijian Liu,

Ligeng Zhu,

Baifeng Shi,

Zhuoyang Zhang,

Yuming Lou,

Shang Yang,

Haocheng Xi,

Shiyi Cao,

Yuxian Gu,

Dacheng Li,

Xiuyu Li,

Haotian Tang,

Yunhao Fang,

Yukang Chen,

Cheng-Yu Hsieh,

De-An Huang,

An-Chieh Cheng,

Jinyi Hu,

Sifei Liu,

Ranjay Krishna,

Pavlo Molchanov,

Jan Kautz,

Hongxu Yin,

Song Han,

Yao Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhijian and Zhu, Ligeng and Shi, Baifeng and Zhang, Zhuoyang and Lou, Yuming and Yang, Shang and Xi, Haocheng and Cao, Shiyi and Gu, Yuxian and Li, Dacheng and Li, Xiuyu and Tang, Haotian and Fang, Yunhao and Chen, Yukang and Hsieh, Cheng-Yu and Huang, De-An and Cheng, An-Chieh and Hu, Jinyi and Liu, Sifei and Krishna, Ranjay and Molchanov, Pavlo and Kautz, Jan and Yin, Hongxu and Han, Song and Lu, Yao}, title = {NVILA: Efficient Frontier Visual Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4122-4134} }
SemiETS: Integrating Spatial and Content Consistencies for Semi-Supervised End-to-end Text Spotting: Dongliang Luo,

Hanshen Zhu,

Ziyang Zhang,

Dingkang Liang,

Xudong Xie,

Yuliang Liu,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Dongliang and Zhu, Hanshen and Zhang, Ziyang and Liang, Dingkang and Xie, Xudong and Liu, Yuliang and Bai, Xiang}, title = {SemiETS: Integrating Spatial and Content Consistencies for Semi-Supervised End-to-end Text Spotting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9329-9338} }
See Further When Clear: Curriculum Consistency Model: Yunpeng Liu,

Boxiao Liu,

Yi Zhang,

Xingzhong Hou,

Guanglu Song,

Yu Liu,

Haihang You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yunpeng and Liu, Boxiao and Zhang, Yi and Hou, Xingzhong and Song, Guanglu and Liu, Yu and You, Haihang}, title = {See Further When Clear: Curriculum Consistency Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18103-18112} }
From Slow Bidirectional to Fast Autoregressive Video Diffusion Models: Tianwei Yin,

Qiang Zhang,

Richard Zhang,

William T. Freeman,

Fredo Durand,

Eli Shechtman,

Xun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Tianwei and Zhang, Qiang and Zhang, Richard and Freeman, William T. and Durand, Fredo and Shechtman, Eli and Huang, Xun}, title = {From Slow Bidirectional to Fast Autoregressive Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22963-22974} }
PassionSR: Post-Training Quantization with Adaptive Scale in One-Step Diffusion based Image Super-Resolution: Libo Zhu,

Jianze Li,

Haotong Qin,

Wenbo Li,

Yulun Zhang,

Yong Guo,

Xiaokang Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Libo and Li, Jianze and Qin, Haotong and Li, Wenbo and Zhang, Yulun and Guo, Yong and Yang, Xiaokang}, title = {PassionSR: Post-Training Quantization with Adaptive Scale in One-Step Diffusion based Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12778-12788} }
RainyGS: Efficient Rain Synthesis with Physically-Based Gaussian Splatting: Qiyu Dai,

Xingyu Ni,

Qianfan Shen,

Wenzheng Chen,

Baoquan Chen,

Mengyu Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyu and Ni, Xingyu and Shen, Qianfan and Chen, Wenzheng and Chen, Baoquan and Chu, Mengyu}, title = {RainyGS: Efficient Rain Synthesis with Physically-Based Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16153-16162} }
Noise Diffusion for Enhancing Semantic Faithfulness in Text-to-Image Synthesis: Boming Miao,

Chunxiao Li,

Xiaoxiao Wang,

Andi Zhang,

Rui Sun,

Zizhe Wang,

Yao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_CVPR, author = {Miao, Boming and Li, Chunxiao and Wang, Xiaoxiao and Zhang, Andi and Sun, Rui and Wang, Zizhe and Zhu, Yao}, title = {Noise Diffusion for Enhancing Semantic Faithfulness in Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23575-23584} }
MonoInstance: Enhancing Monocular Priors via Multi-view Instance Alignment for Neural Rendering and Reconstruction: Wenyuan Zhang,

Yixiao Yang,

Han Huang,

Liang Han,

Kanle Shi,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wenyuan and Yang, Yixiao and Huang, Han and Han, Liang and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {MonoInstance: Enhancing Monocular Priors via Multi-view Instance Alignment for Neural Rendering and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21642-21653} }
Three-view Focal Length Recovery From Homographies: Yaqing Ding,

Viktor Kocur,

Zuzana Berger Haladova,

Qianliang Wu,

Shen Cai,

Jian Yang,

Zuzana Kukelova; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Yaqing and Kocur, Viktor and Haladova, Zuzana Berger and Wu, Qianliang and Cai, Shen and Yang, Jian and Kukelova, Zuzana}, title = {Three-view Focal Length Recovery From Homographies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11505-11514} }
NoPain: No-box Point Cloud Attack via Optimal Transport Singular Boundary: Zezeng Li,

Xiaoyu Du,

Na Lei,

Liming Chen,

Weimin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zezeng and Du, Xiaoyu and Lei, Na and Chen, Liming and Wang, Weimin}, title = {NoPain: No-box Point Cloud Attack via Optimal Transport Singular Boundary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3492-3502} }
RAP: Retrieval-Augmented Personalization for Multimodal Large Language Models: Haoran Hao,

Jiaming Han,

Changsheng Li,

Yu-Feng Li,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Haoran and Han, Jiaming and Li, Changsheng and Li, Yu-Feng and Yue, Xiangyu}, title = {RAP: Retrieval-Augmented Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14538-14548} }
FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation: Tianyun Zhong,

Chao Liang,

Jianwen Jiang,

Gaojie Lin,

Jiaqi Yang,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Tianyun and Liang, Chao and Jiang, Jianwen and Lin, Gaojie and Yang, Jiaqi and Zhao, Zhou}, title = {FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3101-3110} }
CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models: Rundi Wu,

Ruiqi Gao,

Ben Poole,

Alex Trevithick,

Changxi Zheng,

Jonathan T. Barron,

Aleksander Holynski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Rundi and Gao, Ruiqi and Poole, Ben and Trevithick, Alex and Zheng, Changxi and Barron, Jonathan T. and Holynski, Aleksander}, title = {CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26057-26068} }
Exploring Semantic Feature Discrimination for Perceptual Image Super-Resolution and Opinion-Unaware No-Reference Image Quality Assessment: Guanglu Dong,

Xiangyu Liao,

Mingyang Li,

Guihuan Guo,

Chao Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Guanglu and Liao, Xiangyu and Li, Mingyang and Guo, Guihuan and Ren, Chao}, title = {Exploring Semantic Feature Discrimination for Perceptual Image Super-Resolution and Opinion-Unaware No-Reference Image Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28176-28187} }
Distilling Long-tailed Datasets: Zhenghao Zhao,

Haoxuan Wang,

Yuzhang Shang,

Kai Wang,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhenghao and Wang, Haoxuan and Shang, Yuzhang and Wang, Kai and Yan, Yan}, title = {Distilling Long-tailed Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30609-30618} }
Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders: Fiona Ryan,

Ajay Bati,

Sangmin Lee,

Daniel Bolya,

Judy Hoffman,

James M. Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Ryan_2025_CVPR, author = {Ryan, Fiona and Bati, Ajay and Lee, Sangmin and Bolya, Daniel and Hoffman, Judy and Rehg, James M.}, title = {Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28874-28884} }
Distilling Spectral Graph for Object-Context Aware Open-Vocabulary Semantic Segmentation: Chanyoung Kim,

Dayun Ju,

Woojung Han,

Ming-Hsuan Yang,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Chanyoung and Ju, Dayun and Han, Woojung and Yang, Ming-Hsuan and Hwang, Seong Jae}, title = {Distilling Spectral Graph for Object-Context Aware Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15033-15042} }
Incorporating Dense Knowledge Alignment into Unified Multimodal Representation Models: Yuhao Cui,

Xinxing Zu,

Wenhua Zhang,

Zhongzhou Zhao,

Jinyang Gao; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Yuhao and Zu, Xinxing and Zhang, Wenhua and Zhao, Zhongzhou and Gao, Jinyang}, title = {Incorporating Dense Knowledge Alignment into Unified Multimodal Representation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29733-29743} }
Geometry Field Splatting with Gaussian Surfels: Kaiwen Jiang,

Venkataram Sivaram,

Cheng Peng,

Ravi Ramamoorthi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Kaiwen and Sivaram, Venkataram and Peng, Cheng and Ramamoorthi, Ravi}, title = {Geometry Field Splatting with Gaussian Surfels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5752-5762} }
Stereo4D: Learning How Things Move in 3D from Internet Stereo Videos: Linyi Jin,

Richard Tucker,

Zhengqi Li,

David Fouhey,

Noah Snavely,

Aleksander Holynski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Linyi and Tucker, Richard and Li, Zhengqi and Fouhey, David and Snavely, Noah and Holynski, Aleksander}, title = {Stereo4D: Learning How Things Move in 3D from Internet Stereo Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10497-10509} }
PS-EIP: Robust Photometric Stereo Based on Event Interval Profile: Kazuma Kitazawa,

Takahito Aoto,

Satoshi Ikehata,

Tsuyoshi Takatani; [pdf] [supp]
[bibtex]
@InProceedings{Kitazawa_2025_CVPR, author = {Kitazawa, Kazuma and Aoto, Takahito and Ikehata, Satoshi and Takatani, Tsuyoshi}, title = {PS-EIP: Robust Photometric Stereo Based on Event Interval Profile}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6241-6251} }
GenPC: Zero-shot Point Cloud Completion via 3D Generative Priors: An Li,

Zhe Zhu,

Mingqiang Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, An and Zhu, Zhe and Wei, Mingqiang}, title = {GenPC: Zero-shot Point Cloud Completion via 3D Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1308-1318} }
FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation: Kefan Chen,

Chaerin Min,

Linguang Zhang,

Shreyas Hampali,

Cem Keskin,

Srinath Sridhar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kefan and Min, Chaerin and Zhang, Linguang and Hampali, Shreyas and Keskin, Cem and Sridhar, Srinath}, title = {FoundHand: Large-Scale Domain-Specific Learning for Controllable Hand Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17448-17460} }
InterDyn: Controllable Interactive Dynamics with Video Diffusion Models: Rick Akkerman,

Haiwen Feng,

Michael J. Black,

Dimitrios Tzionas,

Victoria Fernández Abrevaya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Akkerman_2025_CVPR, author = {Akkerman, Rick and Feng, Haiwen and Black, Michael J. and Tzionas, Dimitrios and Abrevaya, Victoria Fern\'andez}, title = {InterDyn: Controllable Interactive Dynamics with Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12467-12479} }
LLMDet: Learning Strong Open-Vocabulary Object Detectors under the Supervision of Large Language Models: Shenghao Fu,

Qize Yang,

Qijie Mo,

Junkai Yan,

Xihan Wei,

Jingke Meng,

Xiaohua Xie,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Shenghao and Yang, Qize and Mo, Qijie and Yan, Junkai and Wei, Xihan and Meng, Jingke and Xie, Xiaohua and Zheng, Wei-Shi}, title = {LLMDet: Learning Strong Open-Vocabulary Object Detectors under the Supervision of Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14987-14997} }
Boost Your Human Image Generation Model via Direct Preference Optimization: Sanghyeon Na,

Yonggyu Kim,

Hyunjoon Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Na_2025_CVPR, author = {Na, Sanghyeon and Kim, Yonggyu and Lee, Hyunjoon}, title = {Boost Your Human Image Generation Model via Direct Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23551-23562} }
Learning to Highlight Audio by Watching Movies: Chao Huang,

Ruohan Gao,

J. M. F. Tsang,

Jan Kurcius,

Cagdas Bilen,

Chenliang Xu,

Anurag Kumar,

Sanjeel Parekh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Chao and Gao, Ruohan and Tsang, J. M. F. and Kurcius, Jan and Bilen, Cagdas and Xu, Chenliang and Kumar, Anurag and Parekh, Sanjeel}, title = {Learning to Highlight Audio by Watching Movies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23925-23935} }
Unified Uncertainty-Aware Diffusion for Multi-Agent Trajectory Modeling: Guillem Capellera,

Antonio Rubio,

Luis Ferraz,

Antonio Agudo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Capellera_2025_CVPR, author = {Capellera, Guillem and Rubio, Antonio and Ferraz, Luis and Agudo, Antonio}, title = {Unified Uncertainty-Aware Diffusion for Multi-Agent Trajectory Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22476-22486} }
WeGen: A Unified Model for Interactive Multimodal Generation as We Chat: Zhipeng Huang,

Shaobin Zhuang,

Canmiao Fu,

Binxin Yang,

Ying Zhang,

Chong Sun,

Zhizheng Zhang,

Yali Wang,

Chen Li,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhipeng and Zhuang, Shaobin and Fu, Canmiao and Yang, Binxin and Zhang, Ying and Sun, Chong and Zhang, Zhizheng and Wang, Yali and Li, Chen and Zha, Zheng-Jun}, title = {WeGen: A Unified Model for Interactive Multimodal Generation as We Chat}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23679-23689} }
HRAvatar: High-Quality and Relightable Gaussian Head Avatar: Dongbin Zhang,

Yunfei Liu,

Lijian Lin,

Ye Zhu,

Kangjie Chen,

Minghan Qin,

Yu Li,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dongbin and Liu, Yunfei and Lin, Lijian and Zhu, Ye and Chen, Kangjie and Qin, Minghan and Li, Yu and Wang, Haoqian}, title = {HRAvatar: High-Quality and Relightable Gaussian Head Avatar}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26285-26296} }
Latent Drifting in Diffusion Models for Counterfactual Medical Image Synthesis: Yousef Yeganeh,

Azade Farshad,

Ioannis Charisiadis,

Marta Hasny,

Martin Hartenberger,

Björn Ommer,

Nassir Navab,

Ehsan Adeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeganeh_2025_CVPR, author = {Yeganeh, Yousef and Farshad, Azade and Charisiadis, Ioannis and Hasny, Marta and Hartenberger, Martin and Ommer, Bj\"orn and Navab, Nassir and Adeli, Ehsan}, title = {Latent Drifting in Diffusion Models for Counterfactual Medical Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7685-7695} }
Rethinking Spiking Self-Attention Mechanism: Implementing a-XNOR Similarity Calculation in Spiking Transformers: Yichen Xiao,

Shuai Wang,

Dehao Zhang,

Wenjie Wei,

Yimeng Shan,

Xiaoli Liu,

Yulin Jiang,

Malu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Yichen and Wang, Shuai and Zhang, Dehao and Wei, Wenjie and Shan, Yimeng and Liu, Xiaoli and Jiang, Yulin and Zhang, Malu}, title = {Rethinking Spiking Self-Attention Mechanism: Implementing a-XNOR Similarity Calculation in Spiking Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5444-5454} }
MagicQuill: An Intelligent Interactive Image Editing System: Zichen Liu,

Yue Yu,

Hao Ouyang,

Qiuyu Wang,

Ka Leong Cheng,

Wen Wang,

Zhiheng Liu,

Qifeng Chen,

Yujun Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zichen and Yu, Yue and Ouyang, Hao and Wang, Qiuyu and Cheng, Ka Leong and Wang, Wen and Liu, Zhiheng and Chen, Qifeng and Shen, Yujun}, title = {MagicQuill: An Intelligent Interactive Image Editing System}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13072-13082} }
HeMoRa: Unsupervised Heuristic Consensus Sampling for Robust Point Cloud Registration: Shaocheng Yan,

Yiming Wang,

Kaiyan Zhao,

Pengcheng Shi,

Zhenjun Zhao,

Yongjun Zhang,

Jiayuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Shaocheng and Wang, Yiming and Zhao, Kaiyan and Shi, Pengcheng and Zhao, Zhenjun and Zhang, Yongjun and Li, Jiayuan}, title = {HeMoRa: Unsupervised Heuristic Consensus Sampling for Robust Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1363-1373} }
Reducing Class-wise Confusion for Incremental Learning with Disentangled Manifolds: Huitong Chen,

Yu Wang,

Yan Fan,

Guosong Jiang,

Qinghua Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Huitong and Wang, Yu and Fan, Yan and Jiang, Guosong and Hu, Qinghua}, title = {Reducing Class-wise Confusion for Incremental Learning with Disentangled Manifolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10121-10130} }
Open-Vocabulary Functional 3D Scene Graphs for Real-World Indoor Spaces: Chenyangguang Zhang,

Alexandros Delitzas,

Fangjinhua Wang,

Ruida Zhang,

Xiangyang Ji,

Marc Pollefeys,

Francis Engelmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyangguang and Delitzas, Alexandros and Wang, Fangjinhua and Zhang, Ruida and Ji, Xiangyang and Pollefeys, Marc and Engelmann, Francis}, title = {Open-Vocabulary Functional 3D Scene Graphs for Real-World Indoor Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19401-19413} }
Boosting Adversarial Transferability through Augmentation in Hypothesis Space: Yu Guo,

Weiquan Liu,

Qingshan Xu,

Shijun Zheng,

Shujun Huang,

Yu Zang,

Siqi Shen,

Chenglu Wen,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yu and Liu, Weiquan and Xu, Qingshan and Zheng, Shijun and Huang, Shujun and Zang, Yu and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {Boosting Adversarial Transferability through Augmentation in Hypothesis Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19175-19185} }
AniMo: Species-Aware Model for Text-Driven Animal Motion Generation: Xuan Wang,

Kai Ruan,

Xing Zhang,

Gaoang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xuan and Ruan, Kai and Zhang, Xing and Wang, Gaoang}, title = {AniMo: Species-Aware Model for Text-Driven Animal Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1929-1939} }
EditAR: Unified Conditional Generation with Autoregressive Models: Jiteng Mu,

Nuno Vasconcelos,

Xiaolong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mu_2025_CVPR, author = {Mu, Jiteng and Vasconcelos, Nuno and Wang, Xiaolong}, title = {EditAR: Unified Conditional Generation with Autoregressive Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7899-7909} }
Instance-wise Supervision-level Optimization in Active Learning: Shinnosuke Matsuo,

Riku Togashi,

Ryoma Bise,

Seiichi Uchida,

Masahiro Nomura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsuo_2025_CVPR, author = {Matsuo, Shinnosuke and Togashi, Riku and Bise, Ryoma and Uchida, Seiichi and Nomura, Masahiro}, title = {Instance-wise Supervision-level Optimization in Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4939-4947} }
ViiNeuS: Volumetric Initialization for Implicit Neural Surface Reconstruction of Urban Scenes with Limited Image Overlap: Hala Djeghim,

Nathan Piasco,

Moussab Bennehar,

Luis Roldao,

Dzmitry Tsishkou,

Désiré Sidibé; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Djeghim_2025_CVPR, author = {Djeghim, Hala and Piasco, Nathan and Bennehar, Moussab and Roldao, Luis and Tsishkou, Dzmitry and Sidib\'e, D\'esir\'e}, title = {ViiNeuS: Volumetric Initialization for Implicit Neural Surface Reconstruction of Urban Scenes with Limited Image Overlap}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11854-11863} }
Model Diagnosis and Correction via Linguistic and Implicit Attribute Editing: Xuanbai Chen,

Xiang Xu,

Zhihua Li,

Tianchen Zhao,

Pietro Perona,

Qin Zhang,

Yifan Xing; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xuanbai and Xu, Xiang and Li, Zhihua and Zhao, Tianchen and Perona, Pietro and Zhang, Qin and Xing, Yifan}, title = {Model Diagnosis and Correction via Linguistic and Implicit Attribute Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14281-14292} }
BHViT: Binarized Hybrid Vision Transformer: Tian Gao,

Yu Zhang,

Zhiyuan Zhang,

Huajun Liu,

Kaijie Yin,

Chengzhong Xu,

Hui Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Tian and Zhang, Yu and Zhang, Zhiyuan and Liu, Huajun and Yin, Kaijie and Xu, Chengzhong and Kong, Hui}, title = {BHViT: Binarized Hybrid Vision Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3563-3572} }
UniPhy: Learning a Unified Constitutive Model for Inverse Physics Simulation: Himangi Mittal,

Peiye Zhuang,

Hsin-Ying Lee,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mittal_2025_CVPR, author = {Mittal, Himangi and Zhuang, Peiye and Lee, Hsin-Ying and Tulsiani, Shubham}, title = {UniPhy: Learning a Unified Constitutive Model for Inverse Physics Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16208-16218} }
STAA-SNN: Spatial-Temporal Attention Aggregator for Spiking Neural Networks: Tianqing Zhang,

Kairong Yu,

Xian Zhong,

Hongwei Wang,

Qi Xu,

Qiang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Tianqing and Yu, Kairong and Zhong, Xian and Wang, Hongwei and Xu, Qi and Zhang, Qiang}, title = {STAA-SNN: Spatial-Temporal Attention Aggregator for Spiking Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13959-13969} }
Pathways on the Image Manifold: Image Editing via Video Generation: Noam Rotstein,

Gal Yona,

Daniel Silver,

Roy Velich,

David Bensaid,

Ron Kimmel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rotstein_2025_CVPR, author = {Rotstein, Noam and Yona, Gal and Silver, Daniel and Velich, Roy and Bensaid, David and Kimmel, Ron}, title = {Pathways on the Image Manifold: Image Editing via Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7857-7866} }
DeSplat: Decomposed Gaussian Splatting for Distractor-Free Rendering: Yihao Wang,

Marcus Klasson,

Matias Turkulainen,

Shuzhe Wang,

Juho Kannala,

Arno Solin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yihao and Klasson, Marcus and Turkulainen, Matias and Wang, Shuzhe and Kannala, Juho and Solin, Arno}, title = {DeSplat: Decomposed Gaussian Splatting for Distractor-Free Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {722-732} }
Knowledge Memorization and Rumination for Pre-trained Model-based Class-Incremental Learning: Zijian Gao,

Wangwang Jia,

Xingxing Zhang,

Dulan Zhou,

Kele Xu,

Feng Dawei,

Yong Dou,

Xinjun Mao,

Huaimin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Zijian and Jia, Wangwang and Zhang, Xingxing and Zhou, Dulan and Xu, Kele and Dawei, Feng and Dou, Yong and Mao, Xinjun and Wang, Huaimin}, title = {Knowledge Memorization and Rumination for Pre-trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20523-20533} }
A Distractor-Aware Memory for Visual Object Tracking with SAM2: Jovana Videnovic,

Alan Lukezic,

Matej Kristan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Videnovic_2025_CVPR, author = {Videnovic, Jovana and Lukezic, Alan and Kristan, Matej}, title = {A Distractor-Aware Memory for Visual Object Tracking with SAM2}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24255-24264} }
Activating Sparse Part Concepts for 3D Class Incremental Learning: Zhenya Tian,

Jun Xiao,

Lupeng Liu,

Haiyong Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zhenya and Xiao, Jun and Liu, Lupeng and Jiang, Haiyong}, title = {Activating Sparse Part Concepts for 3D Class Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30343-30353} }
ProxyTransformation: Preshaping Point Cloud Manifold With Proxy Attention For 3D Visual Grounding: Qihang Peng,

Henry Zheng,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Qihang and Zheng, Henry and Huang, Gao}, title = {ProxyTransformation: Preshaping Point Cloud Manifold With Proxy Attention For 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24582-24592} }
BFANet: Revisiting 3D Semantic Segmentation with Boundary Feature Analysis: Weiguang Zhao,

Rui Zhang,

Qiufeng Wang,

Guangliang Cheng,

Kaizhu Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Weiguang and Zhang, Rui and Wang, Qiufeng and Cheng, Guangliang and Huang, Kaizhu}, title = {BFANet: Revisiting 3D Semantic Segmentation with Boundary Feature Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29395-29405} }
Stable Flow: Vital Layers for Training-Free Image Editing: Omri Avrahami,

Or Patashnik,

Ohad Fried,

Egor Nemchinov,

Kfir Aberman,

Dani Lischinski,

Daniel Cohen-Or; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Avrahami_2025_CVPR, author = {Avrahami, Omri and Patashnik, Or and Fried, Ohad and Nemchinov, Egor and Aberman, Kfir and Lischinski, Dani and Cohen-Or, Daniel}, title = {Stable Flow: Vital Layers for Training-Free Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7877-7888} }
Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval: Arun Reddy,

Alexander Martin,

Eugene Yang,

Andrew Yates,

Kate Sanders,

Kenton Murray,

Reno Kriz,

Celso M. de Melo,

Benjamin Van Durme,

Rama Chellappa; [pdf] [supp]
[bibtex]
@InProceedings{Reddy_2025_CVPR, author = {Reddy, Arun and Martin, Alexander and Yang, Eugene and Yates, Andrew and Sanders, Kate and Murray, Kenton and Kriz, Reno and de Melo, Celso M. and Van Durme, Benjamin and Chellappa, Rama}, title = {Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19691-19701} }
Beyond Words: Augmenting Discriminative Richness via Diffusions in Unsupervised Prompt Learning: Hairui Ren,

Fan Tang,

He Zhao,

Zixuan Wang,

Dandan Guo,

Yi Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Hairui and Tang, Fan and Zhao, He and Wang, Zixuan and Guo, Dandan and Chang, Yi}, title = {Beyond Words: Augmenting Discriminative Richness via Diffusions in Unsupervised Prompt Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25135-25144} }
Unlocking the Potential of Unlabeled Data in Semi-Supervised Domain Generalization: Dongkwan Lee,

Kyomin Hwang,

Nojun Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Dongkwan and Hwang, Kyomin and Kwak, Nojun}, title = {Unlocking the Potential of Unlabeled Data in Semi-Supervised Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30599-30608} }
TokenMotion: Decoupled Motion Control via Token Disentanglement for Human-centric Video Generation: Ruineng Li,

Daitao Xing,

Huiming Sun,

Yuanzhou Ha,

Jinglin Shen,

Chiuman Ho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ruineng and Xing, Daitao and Sun, Huiming and Ha, Yuanzhou and Shen, Jinglin and Ho, Chiuman}, title = {TokenMotion: Decoupled Motion Control via Token Disentanglement for Human-centric Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1951-1961} }
CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools: Chinedu Innocent Nwoye,

Kareem Elgohary,

Anvita Srinivas,

Fauzan Zaid,

Joël L. Lavanchy,

Nicolas Padoy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nwoye_2025_CVPR, author = {Nwoye, Chinedu Innocent and Elgohary, Kareem and Srinivas, Anvita and Zaid, Fauzan and Lavanchy, Jo\"el L. and Padoy, Nicolas}, title = {CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8942-8952} }
Visual and Semantic Prompt Collaboration for Generalized Zero-Shot Learning: Huajie Jiang,

Zhengxian Li,

Xiaohan Yu,

Yongli Hu,

Baocai Yin,

Jian Yang,

Yuankai Qi; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Huajie and Li, Zhengxian and Yu, Xiaohan and Hu, Yongli and Yin, Baocai and Yang, Jian and Qi, Yuankai}, title = {Visual and Semantic Prompt Collaboration for Generalized Zero-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20275-20285} }
Steering Away from Harm: An Adaptive Approach to Defending Vision Language Model Against Jailbreaks: Han Wang,

Gang Wang,

Huan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Han and Wang, Gang and Zhang, Huan}, title = {Steering Away from Harm: An Adaptive Approach to Defending Vision Language Model Against Jailbreaks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29947-29957} }
Neural LightRig: Unlocking Accurate Object Normal and Material Estimation with Multi-Light Diffusion: Zexin He,

Tengfei Wang,

Xin Huang,

Xingang Pan,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Zexin and Wang, Tengfei and Huang, Xin and Pan, Xingang and Liu, Ziwei}, title = {Neural LightRig: Unlocking Accurate Object Normal and Material Estimation with Multi-Light Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26514-26524} }
VidMuse: A Simple Video-to-Music Generation Framework with Long-Short-Term Modeling: Zeyue Tian,

Zhaoyang Liu,

Ruibin Yuan,

Jiahao Pan,

Qifeng Liu,

Xu Tan,

Qifeng Chen,

Wei Xue,

Yike Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zeyue and Liu, Zhaoyang and Yuan, Ruibin and Pan, Jiahao and Liu, Qifeng and Tan, Xu and Chen, Qifeng and Xue, Wei and Guo, Yike}, title = {VidMuse: A Simple Video-to-Music Generation Framework with Long-Short-Term Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18782-18793} }
Human-centered Interactive Learning via MLLMs for Text-to-Image Person Re-identification: Yang Qin,

Chao Chen,

Zhihang Fu,

Dezhong Peng,

Xi Peng,

Peng Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Yang and Chen, Chao and Fu, Zhihang and Peng, Dezhong and Peng, Xi and Hu, Peng}, title = {Human-centered Interactive Learning via MLLMs for Text-to-Image Person Re-identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14390-14399} }
Conditional Balance: Improving Multi-Conditioning Trade-Offs in Image Generation: Nadav Z. Cohen,

Oron Nir,

Ariel Shamir; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cohen_2025_CVPR, author = {Cohen, Nadav Z. and Nir, Oron and Shamir, Ariel}, title = {Conditional Balance: Improving Multi-Conditioning Trade-Offs in Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2641-2650} }
KeyFace: Expressive Audio-Driven Facial Animation for Long Sequences via KeyFrame Interpolation: Antoni Bigata,

Michał Stypułkowski,

Rodrigo Mira,

Stella Bounareli,

Konstantinos Vougioukas,

Zoe Landgraf,

Nikita Drobyshev,

Maciej Zieba,

Stavros Petridis,

Maja Pantic; [pdf] [supp]
[bibtex]
@InProceedings{Bigata_2025_CVPR, author = {Bigata, Antoni and Stypu{\l}kowski, Micha{\l} and Mira, Rodrigo and Bounareli, Stella and Vougioukas, Konstantinos and Landgraf, Zoe and Drobyshev, Nikita and Zieba, Maciej and Petridis, Stavros and Pantic, Maja}, title = {KeyFace: Expressive Audio-Driven Facial Animation for Long Sequences via KeyFrame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5477-5488} }
Context-Enhanced Memory-Refined Transformer for Online Action Detection: Zhanzhong Pang,

Fadime Sener,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Zhanzhong and Sener, Fadime and Yao, Angela}, title = {Context-Enhanced Memory-Refined Transformer for Online Action Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8700-8710} }
Towards Natural Language-Based Document Image Retrieval: New Dataset and Benchmark: Hao Guo,

Xugong Qin,

Jun Jie Ou Yang,

Peng Zhang,

Gangyan Zeng,

Yubo Li,

Hailun Lin; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Hao and Qin, Xugong and Yang, Jun Jie Ou and Zhang, Peng and Zeng, Gangyan and Li, Yubo and Lin, Hailun}, title = {Towards Natural Language-Based Document Image Retrieval: New Dataset and Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29722-29732} }
Mitigating Ambiguities in 3D Classification with Gaussian Splatting: Ruiqi Zhang,

Hao Zhu,

Jingyi Zhao,

Qi Zhang,

Xun Cao,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ruiqi and Zhu, Hao and Zhao, Jingyi and Zhang, Qi and Cao, Xun and Ma, Zhan}, title = {Mitigating Ambiguities in 3D Classification with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27275-27284} }
Exposure-slot: Exposure-centric Representations Learning with Slot-in-Slot Attention for Region-aware Exposure Correction: Donggoo Jung,

Daehyun Kim,

Guanghui Wang,

Tae Hyun Kim; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Donggoo and Kim, Daehyun and Wang, Guanghui and Kim, Tae Hyun}, title = {Exposure-slot: Exposure-centric Representations Learning with Slot-in-Slot Attention for Region-aware Exposure Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17892-17901} }
Data-Free Group-Wise Fully Quantized Winograd Convolution via Learnable Scales: Shuokai Pan,

Gerti Tuzi,

Sudarshan Sreeram,

Dibakar Gope; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Shuokai and Tuzi, Gerti and Sreeram, Sudarshan and Gope, Dibakar}, title = {Data-Free Group-Wise Fully Quantized Winograd Convolution via Learnable Scales}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4091-4100} }
EdgeDiff: Edge-aware Diffusion Network for Building Reconstruction from Point Clouds: Yujun Liu,

Ruisheng Wang,

Shangfeng Huang,

Guorong Cai; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yujun and Wang, Ruisheng and Huang, Shangfeng and Cai, Guorong}, title = {EdgeDiff: Edge-aware Diffusion Network for Building Reconstruction from Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17008-17018} }
GEN3C: 3D-Informed World-Consistent Video Generation with Precise Camera Control: Xuanchi Ren,

Tianchang Shen,

Jiahui Huang,

Huan Ling,

Yifan Lu,

Merlin Nimier-David,

Thomas Müller,

Alexander Keller,

Sanja Fidler,

Jun Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Xuanchi and Shen, Tianchang and Huang, Jiahui and Ling, Huan and Lu, Yifan and Nimier-David, Merlin and M\"uller, Thomas and Keller, Alexander and Fidler, Sanja and Gao, Jun}, title = {GEN3C: 3D-Informed World-Consistent Video Generation with Precise Camera Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6121-6132} }
A Dataset for Semantic Segmentation in the Presence of Unknowns: Zakaria Laskar,

Tomas Vojir,

Matej Grcic,

Iaroslav Melekhov,

Shankar Gangisetty,

Juho Kannala,

Jiri Matas,

Giorgos Tolias,

C.V. Jawahar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Laskar_2025_CVPR, author = {Laskar, Zakaria and Vojir, Tomas and Grcic, Matej and Melekhov, Iaroslav and Gangisetty, Shankar and Kannala, Juho and Matas, Jiri and Tolias, Giorgos and Jawahar, C.V.}, title = {A Dataset for Semantic Segmentation in the Presence of Unknowns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1439-1448} }
HierarQ: Task-Aware Hierarchical Q-Former for Enhanced Video Understanding: Shehreen Azad,

Vibhav Vineet,

Yogesh Singh Rawat; [pdf] [supp]
[bibtex]
@InProceedings{Azad_2025_CVPR, author = {Azad, Shehreen and Vineet, Vibhav and Rawat, Yogesh Singh}, title = {HierarQ: Task-Aware Hierarchical Q-Former for Enhanced Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8545-8556} }
DeNVeR: Deformable Neural Vessel Representations for Unsupervised Video Vessel Segmentation: Chun-Hung Wu,

Shih-Hong Chen,

Chih-Yao Hu,

Hsin-Yu Wu,

Kai-Hsin Chen,

Yu-You Chen,

Chih-Hai Su,

Chih-Kuo Lee,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Chun-Hung and Chen, Shih-Hong and Hu, Chih-Yao and Wu, Hsin-Yu and Chen, Kai-Hsin and Chen, Yu-You and Su, Chih-Hai and Lee, Chih-Kuo and Liu, Yu-Lun}, title = {DeNVeR: Deformable Neural Vessel Representations for Unsupervised Video Vessel Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15682-15692} }
DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid Set-Embeddings Learning: Kun Zhang,

Jingyu Li,

Zhe Li,

S.Kevin Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kun and Li, Jingyu and Li, Zhe and Zhou, S.Kevin}, title = {DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid Set-Embeddings Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24993-25003} }
Task-Aware Clustering for Prompting Vision-Language Models: Fusheng Hao,

Fengxiang He,

Fuxiang Wu,

Tichao Wang,

Chengqun Song,

Jun Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Fusheng and He, Fengxiang and Wu, Fuxiang and Wang, Tichao and Song, Chengqun and Cheng, Jun}, title = {Task-Aware Clustering for Prompting Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14745-14755} }
FSboard: Over 3 Million Characters of ASL Fingerspelling Collected via Smartphones: Manfred Georg,

Garrett Tanzer,

Esha Uboweja,

Saad Hassan,

Maximus Shengelia,

Sam Sepah,

Sean Forbes,

Thad Starner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Georg_2025_CVPR, author = {Georg, Manfred and Tanzer, Garrett and Uboweja, Esha and Hassan, Saad and Shengelia, Maximus and Sepah, Sam and Forbes, Sean and Starner, Thad}, title = {FSboard: Over 3 Million Characters of ASL Fingerspelling Collected via Smartphones}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13897-13906} }
CASP: Compression of Large Multimodal Models Based on Attention Sparsity: Mohsen Gholami,

Mohammad Akbari,

Kevin Cannons,

Yong Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gholami_2025_CVPR, author = {Gholami, Mohsen and Akbari, Mohammad and Cannons, Kevin and Zhang, Yong}, title = {CASP: Compression of Large Multimodal Models Based on Attention Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9372-9381} }
UNIC-Adapter: Unified Image-instruction Adapter with Multi-modal Transformer for Image Generation: Lunhao Duan,

Shanshan Zhao,

Wenjun Yan,

Yinglun Li,

Qing-Guo Chen,

Zhao Xu,

Weihua Luo,

Kaifu Zhang,

Mingming Gong,

Gui-Song Xia; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Lunhao and Zhao, Shanshan and Yan, Wenjun and Li, Yinglun and Chen, Qing-Guo and Xu, Zhao and Luo, Weihua and Zhang, Kaifu and Gong, Mingming and Xia, Gui-Song}, title = {UNIC-Adapter: Unified Image-instruction Adapter with Multi-modal Transformer for Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7963-7973} }
Towards Cost-Effective Learning: A Synergy of Semi-Supervised and Active Learning: Tianxiang Yin,

Ningzhong Liu,

Han Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Tianxiang and Liu, Ningzhong and Sun, Han}, title = {Towards Cost-Effective Learning: A Synergy of Semi-Supervised and Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10163-10172} }
Unveil Inversion and Invariance in Flow Transformer for Versatile Image Editing: Pengcheng Xu,

Boyuan Jiang,

Xiaobin Hu,

Donghao Luo,

Qingdong He,

Jiangning Zhang,

Chengjie Wang,

Yunsheng Wu,

Charles Ling,

Boyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Pengcheng and Jiang, Boyuan and Hu, Xiaobin and Luo, Donghao and He, Qingdong and Zhang, Jiangning and Wang, Chengjie and Wu, Yunsheng and Ling, Charles and Wang, Boyu}, title = {Unveil Inversion and Invariance in Flow Transformer for Versatile Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28479-28489} }
Light Transport-aware Diffusion Posterior Sampling for Single-View Reconstruction of 3D Volumes: Ludwic Leonard,

Nils Thurey,

Rüdiger Westermann; [pdf] [supp]
[bibtex]
@InProceedings{Leonard_2025_CVPR, author = {Leonard, Ludwic and Thurey, Nils and Westermann, R\"udiger}, title = {Light Transport-aware Diffusion Posterior Sampling for Single-View Reconstruction of 3D Volumes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16163-16174} }
DyCON: Dynamic Uncertainty-aware Consistency and Contrastive Learning for Semi-supervised Medical Image Segmentation: Maregu Assefa,

Muzammal Naseer,

Iyyakutti Iyappan Ganapathi,

Syed Sadaf Ali,

Mohamed L Seghier,

Naoufel Werghi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Assefa_2025_CVPR, author = {Assefa, Maregu and Naseer, Muzammal and Ganapathi, Iyyakutti Iyappan and Ali, Syed Sadaf and Seghier, Mohamed L and Werghi, Naoufel}, title = {DyCON: Dynamic Uncertainty-aware Consistency and Contrastive Learning for Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30850-30860} }
STiL: Semi-supervised Tabular-Image Learning for Comprehensive Task-Relevant Information Exploration in Multimodal Classification: Siyi Du,

Xinzhe Luo,

Declan P. O'Regan,

Chen Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Siyi and Luo, Xinzhe and O'Regan, Declan P. and Qin, Chen}, title = {STiL: Semi-supervised Tabular-Image Learning for Comprehensive Task-Relevant Information Exploration in Multimodal Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15549-15559} }
DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers: Mert Bülent Sarıyıldız,

Philippe Weinzaepfel,

Thomas Lucas,

Pau de Jorge,

Diane Larlus,

Yannis Kalantidis; [pdf] [supp]
[bibtex]
@InProceedings{Sariyildiz_2025_CVPR, author = {Sar{\i}y{\i}ld{\i}z, Mert B\"ulent and Weinzaepfel, Philippe and Lucas, Thomas and de Jorge, Pau and Larlus, Diane and Kalantidis, Yannis}, title = {DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30084-30094} }
Black Hole-Driven Identity Absorbing in Diffusion Models: Muhammad Shaheryar,

Jong Taek Lee,

Soon Ki Jung; [pdf] [supp]
[bibtex]
@InProceedings{Shaheryar_2025_CVPR, author = {Shaheryar, Muhammad and Lee, Jong Taek and Jung, Soon Ki}, title = {Black Hole-Driven Identity Absorbing in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28544-28554} }
HiRes-LLaVA: Restoring Fragmentation Input in High-Resolution Large Vision-Language Models: Runhui Huang,

Xinpeng Ding,

Chunwei Wang,

Jianhua Han,

Yulong Liu,

Hengshuang Zhao,

Hang Xu,

Lu Hou,

Wei Zhang,

Xiaodan Liang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Runhui and Ding, Xinpeng and Wang, Chunwei and Han, Jianhua and Liu, Yulong and Zhao, Hengshuang and Xu, Hang and Hou, Lu and Zhang, Wei and Liang, Xiaodan}, title = {HiRes-LLaVA: Restoring Fragmentation Input in High-Resolution Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29814-29824} }
Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Video Diffusion Transformer: Jiahao Cui,

Hui Li,

Yun Zhan,

Hanlin Shang,

Kaihui Cheng,

Yuqi Ma,

Shan Mu,

Hang Zhou,

Jingdong Wang,

Siyu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Jiahao and Li, Hui and Zhan, Yun and Shang, Hanlin and Cheng, Kaihui and Ma, Yuqi and Mu, Shan and Zhou, Hang and Wang, Jingdong and Zhu, Siyu}, title = {Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Video Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21086-21095} }
Generative Photography: Scene-Consistent Camera Control for Realistic Text-to-Image Synthesis: Yu Yuan,

Xijun Wang,

Yichen Sheng,

Prateek Chennuri,

Xingguang Zhang,

Stanley Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yu and Wang, Xijun and Sheng, Yichen and Chennuri, Prateek and Zhang, Xingguang and Chan, Stanley}, title = {Generative Photography: Scene-Consistent Camera Control for Realistic Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7920-7930} }
Advancing Manga Analysis: Comprehensive Segmentation Annotations for the Manga109 Dataset: Minshan Xie,

Jian Lin,

Hanyuan Liu,

Chengze Li,

Tien-Tsin Wong; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Minshan and Lin, Jian and Liu, Hanyuan and Li, Chengze and Wong, Tien-Tsin}, title = {Advancing Manga Analysis: Comprehensive Segmentation Annotations for the Manga109 Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8869-8878} }
SeqMvRL: A Sequential Fusion Framework for Multi-view Representation Learning: Ren Wang,

Haoliang Sun,

Yuxiu Lin,

Chuanhui Zuo,

Yongshun Gong,

Yilong Yin,

Wenjia Meng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ren and Sun, Haoliang and Lin, Yuxiu and Zuo, Chuanhui and Gong, Yongshun and Yin, Yilong and Meng, Wenjia}, title = {SeqMvRL: A Sequential Fusion Framework for Multi-view Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25822-25831} }
Auto Cherry-Picker: Learning from High-quality Generative Data Driven by Language: Yicheng Chen,

Xiangtai Li,

Yining Li,

Yanhong Zeng,

Jianzong Wu,

Xiangyu Zhao,

Kai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yicheng and Li, Xiangtai and Li, Yining and Zeng, Yanhong and Wu, Jianzong and Zhao, Xiangyu and Chen, Kai}, title = {Auto Cherry-Picker: Learning from High-quality Generative Data Driven by Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19952-19962} }
EnvGS: Modeling View-Dependent Appearance with Environment Gaussian: Tao Xie,

Xi Chen,

Zhen Xu,

Yiman Xie,

Yudong Jin,

Yujun Shen,

Sida Peng,

Hujun Bao,

Xiaowei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Tao and Chen, Xi and Xu, Zhen and Xie, Yiman and Jin, Yudong and Shen, Yujun and Peng, Sida and Bao, Hujun and Zhou, Xiaowei}, title = {EnvGS: Modeling View-Dependent Appearance with Environment Gaussian}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5742-5751} }
Provoking Multi-modal Few-Shot LVLM via Exploration-Exploitation In-Context Learning: Cheng Chen,

Yunpeng Zhai,

Yifan Zhao,

Jinyang Gao,

Bolin Ding,

Jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Cheng and Zhai, Yunpeng and Zhao, Yifan and Gao, Jinyang and Ding, Bolin and Li, Jia}, title = {Provoking Multi-modal Few-Shot LVLM via Exploration-Exploitation In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3826-3835} }
BadToken: Token-level Backdoor Attacks to Multi-modal Large Language Models: Zenghui Yuan,

Jiawen Shi,

Pan Zhou,

Neil Zhenqiang Gong,

Lichao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Zenghui and Shi, Jiawen and Zhou, Pan and Gong, Neil Zhenqiang and Sun, Lichao}, title = {BadToken: Token-level Backdoor Attacks to Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29927-29936} }
ReRAW: RGB-to-RAW Image Reconstruction via Stratified Sampling for Efficient Object Detection on the Edge: Radu Berdan,

Beril Besbinar,

Christoph Reinders,

Junji Otsuka,

Daisuke Iso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berdan_2025_CVPR, author = {Berdan, Radu and Besbinar, Beril and Reinders, Christoph and Otsuka, Junji and Iso, Daisuke}, title = {ReRAW: RGB-to-RAW Image Reconstruction via Stratified Sampling for Efficient Object Detection on the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11833-11843} }
VLMs-Guided Representation Distillation for Efficient Vision-Based Reinforcement Learning: Haoran Xu,

Peixi Peng,

Guang Tan,

Yiqian Chang,

Luntong Li,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Haoran and Peng, Peixi and Tan, Guang and Chang, Yiqian and Li, Luntong and Tian, Yonghong}, title = {VLMs-Guided Representation Distillation for Efficient Vision-Based Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29534-29544} }
MonoDGP: Monocular 3D Object Detection with Decoupled-Query and Geometry-Error Priors: Fanqi Pu,

Yifan Wang,

Jiru Deng,

Wenming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pu_2025_CVPR, author = {Pu, Fanqi and Wang, Yifan and Deng, Jiru and Yang, Wenming}, title = {MonoDGP: Monocular 3D Object Detection with Decoupled-Query and Geometry-Error Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6520-6530} }
NeISF++: Neural Incident Stokes Field for Polarized Inverse Rendering of Conductors and Dielectrics: Chenhao Li,

Taishi Ono,

Takeshi Uemori,

Sho Nitta,

Hajime Mihara,

Alexander Gatto,

Hajime Nagahara,

Yusuke Moriuchi; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Chenhao and Ono, Taishi and Uemori, Takeshi and Nitta, Sho and Mihara, Hajime and Gatto, Alexander and Nagahara, Hajime and Moriuchi, Yusuke}, title = {NeISF++: Neural Incident Stokes Field for Polarized Inverse Rendering of Conductors and Dielectrics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26493-26503} }
HunyuanPortrait: Implicit Condition Control for Enhanced Portrait Animation: Zunnan Xu,

Zhentao Yu,

Zixiang Zhou,

Jun Zhou,

Xiaoyu Jin,

Fa-ting Hong,

Xiaozhong Ji,

Junwei Zhu,

Chengfei Cai,

Shiyu Tang,

Qin Lin,

Xiu Li,

Qinglin Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zunnan and Yu, Zhentao and Zhou, Zixiang and Zhou, Jun and Jin, Xiaoyu and Hong, Fa-ting and Ji, Xiaozhong and Zhu, Junwei and Cai, Chengfei and Tang, Shiyu and Lin, Qin and Li, Xiu and Lu, Qinglin}, title = {HunyuanPortrait: Implicit Condition Control for Enhanced Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15909-15919} }
Flexible Group Count Enables Hassle-Free Structured Pruning: Jiamu Zhang,

Shaochen Zhong,

Andrew Ye,

Zirui Liu,

Sebastian Zhao,

Kaixiong Zhou,

Li Li,

Soo-Hyun Choi,

Rui Chen,

Xia Hu,

Shuai Xu,

Vipin Chaudhary; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jiamu and Zhong, Shaochen and Ye, Andrew and Liu, Zirui and Zhao, Sebastian and Zhou, Kaixiong and Li, Li and Choi, Soo-Hyun and Chen, Rui and Hu, Xia and Xu, Shuai and Chaudhary, Vipin}, title = {Flexible Group Count Enables Hassle-Free Structured Pruning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4807-4818} }
EasyCraft: A Robust and Efficient Framework for Automatic Avatar Crafting: Suzhen Wang,

Weijie Chen,

Wei Zhang,

Minda Zhao,

Lincheng Li,

Rongsheng Zhang,

Zhipeng Hu,

Xin Yu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Suzhen and Chen, Weijie and Zhang, Wei and Zhao, Minda and Li, Lincheng and Zhang, Rongsheng and Hu, Zhipeng and Yu, Xin}, title = {EasyCraft: A Robust and Efficient Framework for Automatic Avatar Crafting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5581-5591} }
MeshArt: Generating Articulated Meshes with Structure-Guided Transformers: Daoyi Gao,

Yawar Siddiqui,

Lei Li,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Daoyi and Siddiqui, Yawar and Li, Lei and Dai, Angela}, title = {MeshArt: Generating Articulated Meshes with Structure-Guided Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {618-627} }
Non-Natural Image Understanding with Advancing Frequency-based Vision Encoders: Wang Lin,

QingSong Wang,

Yueying Feng,

Shulei Wang,

Tao Jin,

Zhou Zhao,

Fei Wu,

Chang Yao,

Jingyuan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Wang and Wang, QingSong and Feng, Yueying and Wang, Shulei and Jin, Tao and Zhao, Zhou and Wu, Fei and Yao, Chang and Chen, Jingyuan}, title = {Non-Natural Image Understanding with Advancing Frequency-based Vision Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29756-29766} }
Generative Multimodal Pretraining with Discrete Diffusion Timestep Tokens: Kaihang Pan,

Wang Lin,

Zhongqi Yue,

Tenglong Ao,

Liyu Jia,

Wei Zhao,

Juncheng Li,

Siliang Tang,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Kaihang and Lin, Wang and Yue, Zhongqi and Ao, Tenglong and Jia, Liyu and Zhao, Wei and Li, Juncheng and Tang, Siliang and Zhang, Hanwang}, title = {Generative Multimodal Pretraining with Discrete Diffusion Timestep Tokens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26136-26146} }
SplatFlow: Self-Supervised Dynamic Gaussian Splatting in Neural Motion Flow Field for Autonomous Driving: Su Sun,

Cheng Zhao,

Zhuoyang Sun,

Yingjie Victor Chen,

Mei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Su and Zhao, Cheng and Sun, Zhuoyang and Chen, Yingjie Victor and Chen, Mei}, title = {SplatFlow: Self-Supervised Dynamic Gaussian Splatting in Neural Motion Flow Field for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27487-27496} }
Zero-shot 3D Question Answering via Voxel-based Dynamic Token Compression: Hsiang-Wei Huang,

Fu-Chen Chen,

Wenhao Chai,

Che-Chun Su,

Lu Xia,

Sanghun Jung,

Cheng-Yen Yang,

Jenq-Neng Hwang,

Min Sun,

Cheng-Hao Kuo; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Hsiang-Wei and Chen, Fu-Chen and Chai, Wenhao and Su, Che-Chun and Xia, Lu and Jung, Sanghun and Yang, Cheng-Yen and Hwang, Jenq-Neng and Sun, Min and Kuo, Cheng-Hao}, title = {Zero-shot 3D Question Answering via Voxel-based Dynamic Token Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19424-19434} }
AesthetiQ: Enhancing Graphic Layout Design via Aesthetic-Aware Preference Alignment of Multi-modal Large Language Models: Sohan Patnaik,

Rishabh Jain,

Balaji Krishnamurthy,

Mausoom Sarkar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patnaik_2025_CVPR, author = {Patnaik, Sohan and Jain, Rishabh and Krishnamurthy, Balaji and Sarkar, Mausoom}, title = {AesthetiQ: Enhancing Graphic Layout Design via Aesthetic-Aware Preference Alignment of Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23701-23711} }
Enhanced then Progressive Fusion with View Graph for Multi-View Clustering: Zhibin Dong,

Meng Liu,

Siwei Wang,

Ke Liang,

Yi Zhang,

Suyuan Liu,

Jiaqi Jin,

Xinwang Liu,

En Zhu; [pdf]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Zhibin and Liu, Meng and Wang, Siwei and Liang, Ke and Zhang, Yi and Liu, Suyuan and Jin, Jiaqi and Liu, Xinwang and Zhu, En}, title = {Enhanced then Progressive Fusion with View Graph for Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15518-15527} }
FINECAPTION: Compositional Image Captioning Focusing on Wherever You Want at Any Granularity: Hang Hua,

Qing Liu,

Lingzhi Zhang,

Jing Shi,

Soo Ye Kim,

Zhifei Zhang,

Yilin Wang,

Jianming Zhang,

Zhe Lin,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2025_CVPR, author = {Hua, Hang and Liu, Qing and Zhang, Lingzhi and Shi, Jing and Kim, Soo Ye and Zhang, Zhifei and Wang, Yilin and Zhang, Jianming and Lin, Zhe and Luo, Jiebo}, title = {FINECAPTION: Compositional Image Captioning Focusing on Wherever You Want at Any Granularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24763-24773} }
Adaptive Non-Uniform Timestep Sampling for Accelerating Diffusion Model Training: Myunsoo Kim,

Donghyeon Ki,

Seong-Woong Shim,

Byung-Jun Lee; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Myunsoo and Ki, Donghyeon and Shim, Seong-Woong and Lee, Byung-Jun}, title = {Adaptive Non-Uniform Timestep Sampling for Accelerating Diffusion Model Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2513-2522} }
Chebyshev Attention Depth Permutation Texture Network with Latent Texture Attribute Loss: Ravishankar Evani,

Deepu Rajan,

Shangbo Mao; [pdf] [supp]
[bibtex]
@InProceedings{Evani_2025_CVPR, author = {Evani, Ravishankar and Rajan, Deepu and Mao, Shangbo}, title = {Chebyshev Attention Depth Permutation Texture Network with Latent Texture Attribute Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23423-23432} }
Explainable Saliency: Articulating Reasoning with Contextual Prioritization: Nuo Chen,

Ming Jiang,

Qi Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Nuo and Jiang, Ming and Zhao, Qi}, title = {Explainable Saliency: Articulating Reasoning with Contextual Prioritization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9601-9610} }
Decentralized Diffusion Models: David McAllister,

Matthew Tancik,

Jiaming Song,

Angjoo Kanazawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{McAllister_2025_CVPR, author = {McAllister, David and Tancik, Matthew and Song, Jiaming and Kanazawa, Angjoo}, title = {Decentralized Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23323-23333} }
AnyEdit: Mastering Unified High-Quality Image Editing for Any Idea: Qifan Yu,

Wei Chow,

Zhongqi Yue,

Kaihang Pan,

Yang Wu,

Xiaoyang Wan,

Juncheng Li,

Siliang Tang,

Hanwang Zhang,

Yueting Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Qifan and Chow, Wei and Yue, Zhongqi and Pan, Kaihang and Wu, Yang and Wan, Xiaoyang and Li, Juncheng and Tang, Siliang and Zhang, Hanwang and Zhuang, Yueting}, title = {AnyEdit: Mastering Unified High-Quality Image Editing for Any Idea}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26125-26135} }
Compass Control: Multi Object Orientation Control for Text-to-Image Generation: Rishubh Parihar,

Vaibhav Agrawal,

Sachidanand VS,

Venkatesh Babu Radhakrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parihar_2025_CVPR, author = {Parihar, Rishubh and Agrawal, Vaibhav and VS, Sachidanand and Radhakrishnan, Venkatesh Babu}, title = {Compass Control: Multi Object Orientation Control for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2791-2801} }
Correcting Deviations from Normality: A Reformulated Diffusion Model for Multi-Class Unsupervised Anomaly Detection: Farzad Beizaee,

Gregory A. Lodygensky,

Christian Desrosiers,

Jose Dolz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Beizaee_2025_CVPR, author = {Beizaee, Farzad and Lodygensky, Gregory A. and Desrosiers, Christian and Dolz, Jose}, title = {Correcting Deviations from Normality: A Reformulated Diffusion Model for Multi-Class Unsupervised Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19088-19097} }
Continuous 3D Perception Model with Persistent State: Qianqian Wang,

Yifei Zhang,

Aleksander Holynski,

Alexei A. Efros,

Angjoo Kanazawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qianqian and Zhang, Yifei and Holynski, Aleksander and Efros, Alexei A. and Kanazawa, Angjoo}, title = {Continuous 3D Perception Model with Persistent State}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10510-10522} }
LP-Diff: Towards Improved Restoration of Real-World Degraded License Plate: Haoyan Gong,

Zhenrong Zhang,

Yuzheng Feng,

Anh Nguyen,

Hongbin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2025_CVPR, author = {Gong, Haoyan and Zhang, Zhenrong and Feng, Yuzheng and Nguyen, Anh and Liu, Hongbin}, title = {LP-Diff: Towards Improved Restoration of Real-World Degraded License Plate}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17831-17840} }
Unleashing the Potential of Consistency Learning for Detecting and Grounding Multi-Modal Media Manipulation: Yiheng Li,

Yang Yang,

Zichang Tan,

Huan Liu,

Weihua Chen,

Xu Zhou,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yiheng and Yang, Yang and Tan, Zichang and Liu, Huan and Chen, Weihua and Zhou, Xu and Lei, Zhen}, title = {Unleashing the Potential of Consistency Learning for Detecting and Grounding Multi-Modal Media Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9242-9252} }
DNF: Unconditional 4D Generation with Dictionary-based Neural Fields: Xinyi Zhang,

Naiqi Li,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xinyi and Li, Naiqi and Dai, Angela}, title = {DNF: Unconditional 4D Generation with Dictionary-based Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26047-26056} }
ARM: Appearance Reconstruction Model for Relightable 3D Generation: Xiang Feng,

Chang Yu,

Zoubin Bi,

Yintong Shang,

Feng Gao,

Hongzhi Wu,

Kun Zhou,

Chenfanfu Jiang,

Yin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Xiang and Yu, Chang and Bi, Zoubin and Shang, Yintong and Gao, Feng and Wu, Hongzhi and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {ARM: Appearance Reconstruction Model for Relightable 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21425-21437} }
VideoGEM: Training-free Action Grounding in Videos: Felix Vogel,

Walid Bousselham,

Anna Kukleva,

Nina Shvetsova,

Hilde Kuehne; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vogel_2025_CVPR, author = {Vogel, Felix and Bousselham, Walid and Kukleva, Anna and Shvetsova, Nina and Kuehne, Hilde}, title = {VideoGEM: Training-free Action Grounding in Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3374-3383} }
FilmComposer: LLM-Driven Music Production for Silent Film Clips: Zhifeng Xie,

Qile He,

Youjia Zhu,

Qiwei He,

Mengtian Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Zhifeng and He, Qile and Zhu, Youjia and He, Qiwei and Li, Mengtian}, title = {FilmComposer: LLM-Driven Music Production for Silent Film Clips}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13519-13528} }
Ground-V: Teaching VLMs to Ground Complex Instructions in Pixels: Yongshuo Zong,

Qin Zhang,

Dongsheng An,

Zhihua Li,

Xiang Xu,

Linghan Xu,

Zhuowen Tu,

Yifan Xing,

Onkar Dabeer; [pdf] [supp]
[bibtex]
@InProceedings{Zong_2025_CVPR, author = {Zong, Yongshuo and Zhang, Qin and An, Dongsheng and Li, Zhihua and Xu, Xiang and Xu, Linghan and Tu, Zhuowen and Xing, Yifan and Dabeer, Onkar}, title = {Ground-V: Teaching VLMs to Ground Complex Instructions in Pixels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24635-24645} }
Structure-from-Motion with a Non-Parametric Camera Model: Yihan Wang,

Linfei Pan,

Marc Pollefeys,

Viktor Larsson; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yihan and Pan, Linfei and Pollefeys, Marc and Larsson, Viktor}, title = {Structure-from-Motion with a Non-Parametric Camera Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1040-1049} }
EventPSR: Surface Normal and Reflectance Estimation from Photometric Stereo Using an Event Camera: Bohan Yu,

Jin Han,

Boxin Shi,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Bohan and Han, Jin and Shi, Boxin and Sato, Imari}, title = {EventPSR: Surface Normal and Reflectance Estimation from Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11427-11436} }
LAL: Enhancing 3D Human Motion Prediction with Latency-aware Auxiliary Learning: Xiaoning Sun,

Dong Wei,

Huaijiang Sun,

Shengxiang Hu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Xiaoning and Wei, Dong and Sun, Huaijiang and Hu, Shengxiang}, title = {LAL: Enhancing 3D Human Motion Prediction with Latency-aware Auxiliary Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7105-7114} }
CASP: Consistency-aware Audio-induced Saliency Prediction Model for Omnidirectional Video: Zhaolin Wan,

Han Qin,

Zhiyang Li,

Xiaopeng Fan,

Wangmeng Zuo,

Debin Zhao; [pdf]
[bibtex]
@InProceedings{Wan_2025_CVPR, author = {Wan, Zhaolin and Qin, Han and Li, Zhiyang and Fan, Xiaopeng and Zuo, Wangmeng and Zhao, Debin}, title = {CASP: Consistency-aware Audio-induced Saliency Prediction Model for Omnidirectional Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12605-12614} }
TreeMeshGPT: Artistic Mesh Generation with Autoregressive Tree Sequencing: Stefan Lionar,

Jiabin Liang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lionar_2025_CVPR, author = {Lionar, Stefan and Liang, Jiabin and Lee, Gim Hee}, title = {TreeMeshGPT: Artistic Mesh Generation with Autoregressive Tree Sequencing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26608-26617} }
RefPose: Leveraging Reference Geometric Correspondences for Accurate 6D Pose Estimation of Unseen Objects: Jaeguk Kim,

Jaewoo Park,

Keuntek Lee,

Nam Ik Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jaeguk and Park, Jaewoo and Lee, Keuntek and Cho, Nam Ik}, title = {RefPose: Leveraging Reference Geometric Correspondences for Accurate 6D Pose Estimation of Unseen Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6447-6456} }
Relation3D : Enhancing Relation Modeling for Point Cloud Instance Segmentation: Jiahao Lu,

Jiacheng Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Jiahao and Deng, Jiacheng}, title = {Relation3D : Enhancing Relation Modeling for Point Cloud Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8889-8899} }
Shape My Moves: Text-Driven Shape-Aware Synthesis of Human Motions: Ting-Hsuan Liao,

Yi Zhou,

Yu Shen,

Chun-Hao Paul Huang,

Saayan Mitra,

Jia-Bin Huang,

Uttaran Bhattacharya; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2025_CVPR, author = {Liao, Ting-Hsuan and Zhou, Yi and Shen, Yu and Huang, Chun-Hao Paul and Mitra, Saayan and Huang, Jia-Bin and Bhattacharya, Uttaran}, title = {Shape My Moves: Text-Driven Shape-Aware Synthesis of Human Motions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1917-1928} }
Generating 3D-Consistent Videos from Unposed Internet Photos: Gene Chou,

Kai Zhang,

Sai Bi,

Hao Tan,

Zexiang Xu,

Fujun Luan,

Bharath Hariharan,

Noah Snavely; [pdf] [arXiv]
[bibtex]
@InProceedings{Chou_2025_CVPR, author = {Chou, Gene and Zhang, Kai and Bi, Sai and Tan, Hao and Xu, Zexiang and Luan, Fujun and Hariharan, Bharath and Snavely, Noah}, title = {Generating 3D-Consistent Videos from Unposed Internet Photos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27934-27945} }
Gazing at Rewards: Eye Movements as a Lens into Human and AI Decision-Making in Hybrid Visual Foraging: Bo Wang,

Dingwei Tan,

Yen-Ling Kuo,

Zhaowei Sun,

Jeremy M. Wolfe,

Tat-Jen Cham,

Mengmi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Bo and Tan, Dingwei and Kuo, Yen-Ling and Sun, Zhaowei and Wolfe, Jeremy M. and Cham, Tat-Jen and Zhang, Mengmi}, title = {Gazing at Rewards: Eye Movements as a Lens into Human and AI Decision-Making in Hybrid Visual Foraging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14810-14823} }
FOCUS: Knowledge-enhanced Adaptive Visual Compression for Few-shot Whole Slide Image Classification: Zhengrui Guo,

Conghao Xiong,

Jiabo Ma,

Qichen Sun,

Lishuang Feng,

Jinzhuo Wang,

Hao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zhengrui and Xiong, Conghao and Ma, Jiabo and Sun, Qichen and Feng, Lishuang and Wang, Jinzhuo and Chen, Hao}, title = {FOCUS: Knowledge-enhanced Adaptive Visual Compression for Few-shot Whole Slide Image Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15590-15600} }
Beyond Human Perception: Understanding Multi-Object World from Monocular View: Keyu Guo,

Yongle Huang,

Shijie Sun,

Xiangyu Song,

Mingtao Feng,

Zedong Liu,

Huansheng Song,

Tiantian Wang,

Jianxin Li,

Naveed Akhtar,

Ajmal Saeed Mian; [pdf]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Keyu and Huang, Yongle and Sun, Shijie and Song, Xiangyu and Feng, Mingtao and Liu, Zedong and Song, Huansheng and Wang, Tiantian and Li, Jianxin and Akhtar, Naveed and Mian, Ajmal Saeed}, title = {Beyond Human Perception: Understanding Multi-Object World from Monocular View}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3751-3760} }
GRAE-3DMOT: Geometry Relation-Aware Encoder for Online 3D Multi-Object Tracking: Hyunseop Kim,

Hyo-Jun Lee,

Yonguk Lee,

Jinu Lee,

Hanul Kim,

Yeong Jun Koh; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Hyunseop and Lee, Hyo-Jun and Lee, Yonguk and Lee, Jinu and Kim, Hanul and Koh, Yeong Jun}, title = {GRAE-3DMOT: Geometry Relation-Aware Encoder for Online 3D Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11697-11706} }
Automatic Joint Structured Pruning and Quantization for Efficient Neural Network Training and Compression: Xiaoyi Qu,

David Aponte,

Colby Banbury,

Daniel P. Robinson,

Tianyu Ding,

Kazuhito Koishida,

Ilya Zharkov,

Tianyi Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Xiaoyi and Aponte, David and Banbury, Colby and Robinson, Daniel P. and Ding, Tianyu and Koishida, Kazuhito and Zharkov, Ilya and Chen, Tianyi}, title = {Automatic Joint Structured Pruning and Quantization for Efficient Neural Network Training and Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15234-15244} }
Parameter Efficient Mamba Tuning via Projector-targeted Diagonal-centric Linear Transformation: Seokil Ham,

Hee-Seon Kim,

Sangmin Woo,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ham_2025_CVPR, author = {Ham, Seokil and Kim, Hee-Seon and Woo, Sangmin and Kim, Changick}, title = {Parameter Efficient Mamba Tuning via Projector-targeted Diagonal-centric Linear Transformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30106-30115} }
ViUniT: Visual Unit Tests for More Robust Visual Programming: Artemis Panagopoulou,

Honglu Zhou,

Silvio Savarese,

Caiming Xiong,

Chris Callison-Burch,

Mark Yatskar,

Juan Carlos Niebles; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Panagopoulou_2025_CVPR, author = {Panagopoulou, Artemis and Zhou, Honglu and Savarese, Silvio and Xiong, Caiming and Callison-Burch, Chris and Yatskar, Mark and Niebles, Juan Carlos}, title = {ViUniT: Visual Unit Tests for More Robust Visual Programming}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24646-24656} }
LIRM: Large Inverse Rendering Model for Progressive Reconstruction of Shape, Materials and View-dependent Radiance Fields: Zhengqin Li,

Dilin Wang,

Ka Chen,

Zhaoyang Lv,

Thu Nguyen-Phuoc,

Milim Lee,

Jia-Bin Huang,

Lei Xiao,

Yufeng Zhu,

Carl S. Marshall,

Yuheng Ren,

Richard Newcombe,

Zhao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhengqin and Wang, Dilin and Chen, Ka and Lv, Zhaoyang and Nguyen-Phuoc, Thu and Lee, Milim and Huang, Jia-Bin and Xiao, Lei and Zhu, Yufeng and Marshall, Carl S. and Ren, Yuheng and Newcombe, Richard and Dong, Zhao}, title = {LIRM: Large Inverse Rendering Model for Progressive Reconstruction of Shape, Materials and View-dependent Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {505-517} }
DualTalk: Dual-Speaker Interaction for 3D Talking Head Conversations: Ziqiao Peng,

Yanbo Fan,

Haoyu Wu,

Xuan Wang,

Hongyan Liu,

Jun He,

Zhaoxin Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Ziqiao and Fan, Yanbo and Wu, Haoyu and Wang, Xuan and Liu, Hongyan and He, Jun and Fan, Zhaoxin}, title = {DualTalk: Dual-Speaker Interaction for 3D Talking Head Conversations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21055-21064} }
MAR-3D: Progressive Masked Auto-regressor for High-Resolution 3D Generation: Jinnan Chen,

Lingting Zhu,

Zeyu Hu,

Shengju Qian,

Yugang Chen,

Xin Wang,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jinnan and Zhu, Lingting and Hu, Zeyu and Qian, Shengju and Chen, Yugang and Wang, Xin and Lee, Gim Hee}, title = {MAR-3D: Progressive Masked Auto-regressor for High-Resolution 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11083-11092} }
beta-FFT: Nonlinear Interpolation and Differentiated Training Strategies for Semi-Supervised Medical Image Segmentation: Ming Hu,

Jianfu Yin,

Zhuangzhuang Ma,

Jianheng Ma,

Feiyu Zhu,

Bingbing Wu,

Ya Wen,

Meng Wu,

Cong Hu,

Bingliang Hu,

Quan Wang; [pdf]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Ming and Yin, Jianfu and Ma, Zhuangzhuang and Ma, Jianheng and Zhu, Feiyu and Wu, Bingbing and Wen, Ya and Wu, Meng and Hu, Cong and Hu, Bingliang and Wang, Quan}, title = {beta-FFT: Nonlinear Interpolation and Differentiated Training Strategies for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30839-30849} }
Dynamic Group Normalization: Spatio-Temporal Adaptation to Evolving Data Statistics: Yair Smadar,

Assaf Hoogi; [pdf] [supp]
[bibtex]
@InProceedings{Smadar_2025_CVPR, author = {Smadar, Yair and Hoogi, Assaf}, title = {Dynamic Group Normalization: Spatio-Temporal Adaptation to Evolving Data Statistics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30167-30177} }
Latent Space Super-Resolution for Higher-Resolution Image Generation with Diffusion Models: Jinho Jeong,

Sangmin Han,

Jinwoo Kim,

Seon Joo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Jinho and Han, Sangmin and Kim, Jinwoo and Kim, Seon Joo}, title = {Latent Space Super-Resolution for Higher-Resolution Image Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2355-2365} }
SynerGen-VL: Towards Synergistic Image Understanding and Generation with Vision Experts and Token Folding: Hao Li,

Changyao Tian,

Jie Shao,

Xizhou Zhu,

Zhaokai Wang,

Jinguo Zhu,

Wenhan Dou,

Xiaogang Wang,

Hongsheng Li,

Lewei Lu,

Jifeng Dai; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hao and Tian, Changyao and Shao, Jie and Zhu, Xizhou and Wang, Zhaokai and Zhu, Jinguo and Dou, Wenhan and Wang, Xiaogang and Li, Hongsheng and Lu, Lewei and Dai, Jifeng}, title = {SynerGen-VL: Towards Synergistic Image Understanding and Generation with Vision Experts and Token Folding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29767-29779} }
Synthetic Prior for Few-Shot Drivable Head Avatar Inversion: Wojciech Zielonka,

Stephan J. Garbin,

Alexandros Lattas,

George Kopanas,

Paulo Gotardo,

Thabo Beeler,

Justus Thies,

Timo Bolkart; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zielonka_2025_CVPR, author = {Zielonka, Wojciech and Garbin, Stephan J. and Lattas, Alexandros and Kopanas, George and Gotardo, Paulo and Beeler, Thabo and Thies, Justus and Bolkart, Timo}, title = {Synthetic Prior for Few-Shot Drivable Head Avatar Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10735-10746} }
Reasoning in Visual Navigation of End-to-end Trained Agents: A Dynamical Systems Approach: Steeven Janny,

Hervé Poirier,

Leonid Antsfeld,

Guillaume Bono,

Gianluca Monaci,

Boris Chidlovskii,

Francesco Giuliari,

Alessio Del Bue,

Christian Wolf; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Janny_2025_CVPR, author = {Janny, Steeven and Poirier, Herv\'e and Antsfeld, Leonid and Bono, Guillaume and Monaci, Gianluca and Chidlovskii, Boris and Giuliari, Francesco and Del Bue, Alessio and Wolf, Christian}, title = {Reasoning in Visual Navigation of End-to-end Trained Agents: A Dynamical Systems Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12111-12121} }
Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment: Huakai Lai,

Guoxin Xiong,

Huayu Mai,

Xiang Liu,

Tianzhu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Huakai and Xiong, Guoxin and Mai, Huayu and Liu, Xiang and Zhang, Tianzhu}, title = {Rethinking Noisy Video-Text Retrieval via Relation-aware Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9231-9241} }
DFormerv2: Geometry Self-Attention for RGBD Semantic Segmentation: Bo-Wen Yin,

Jiao-Long Cao,

Ming-Ming Cheng,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Bo-Wen and Cao, Jiao-Long and Cheng, Ming-Ming and Hou, Qibin}, title = {DFormerv2: Geometry Self-Attention for RGBD Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19345-19355} }
Scaling Vision Pre-Training to 4K Resolution: Baifeng Shi,

Boyi Li,

Han Cai,

Yao Lu,

Sifei Liu,

Marco Pavone,

Jan Kautz,

Song Han,

Trevor Darrell,

Pavlo Molchanov,

Hongxu Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Baifeng and Li, Boyi and Cai, Han and Lu, Yao and Liu, Sifei and Pavone, Marco and Kautz, Jan and Han, Song and Darrell, Trevor and Molchanov, Pavlo and Yin, Hongxu}, title = {Scaling Vision Pre-Training to 4K Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9631-9640} }
GarmentPile: Point-Level Visual Affordance Guided Retrieval and Adaptation for Cluttered Garments Manipulation: Ruihai Wu,

Ziyu Zhu,

Yuran Wang,

Yue Chen,

Jiarui Wang,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Ruihai and Zhu, Ziyu and Wang, Yuran and Chen, Yue and Wang, Jiarui and Dong, Hao}, title = {GarmentPile: Point-Level Visual Affordance Guided Retrieval and Adaptation for Cluttered Garments Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6950-6959} }
Uncertain Multimodal Intention and Emotion Understanding in the Wild: Qu Yang,

Qinghongya Shi,

Tongxin Wang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Qu and Shi, Qinghongya and Wang, Tongxin and Ye, Mang}, title = {Uncertain Multimodal Intention and Emotion Understanding in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24700-24709} }
GroomLight: Hybrid Inverse Rendering for Relightable Human Hair Appearance Modeling: Yang Zheng,

Menglei Chai,

Delio Vicini,

Yuxiao Zhou,

Yinghao Xu,

Leonidas Guibas,

Gordon Wetzstein,

Thabo Beeler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Yang and Chai, Menglei and Vicini, Delio and Zhou, Yuxiao and Xu, Yinghao and Guibas, Leonidas and Wetzstein, Gordon and Beeler, Thabo}, title = {GroomLight: Hybrid Inverse Rendering for Relightable Human Hair Appearance Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16040-16050} }
Improving Editability in Image Generation with Layer-wise Memory: Daneul Kim,

Jaeah Lee,

Jaesik Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Daneul and Lee, Jaeah and Park, Jaesik}, title = {Improving Editability in Image Generation with Layer-wise Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7889-7898} }
Sea-ing in Low-light: Nisha Varghese,

A. N. Rajagopalan; [pdf] [supp]
[bibtex]
@InProceedings{Varghese_2025_CVPR, author = {Varghese, Nisha and Rajagopalan, A. N.}, title = {Sea-ing in Low-light}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16629-16640} }
VidTwin: Video VAE with Decoupled Structure and Dynamics: Yuchi Wang,

Junliang Guo,

Xinyi Xie,

Tianyu He,

Xu Sun,

Jiang Bian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuchi and Guo, Junliang and Xie, Xinyi and He, Tianyu and Sun, Xu and Bian, Jiang}, title = {VidTwin: Video VAE with Decoupled Structure and Dynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22922-22932} }
CL-LoRA: Continual Low-Rank Adaptation for Rehearsal-Free Class-Incremental Learning: Jiangpeng He,

Zhihao Duan,

Fengqing Zhu; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Jiangpeng and Duan, Zhihao and Zhu, Fengqing}, title = {CL-LoRA: Continual Low-Rank Adaptation for Rehearsal-Free Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30534-30544} }
Generative Modeling of Class Probability for Multi-Modal Representation Learning: JungKyoo Shin,

Bumsoo Kim,

Eunwoo Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Shin_2025_CVPR, author = {Shin, JungKyoo and Kim, Bumsoo and Kim, Eunwoo}, title = {Generative Modeling of Class Probability for Multi-Modal Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20737-20746} }
VisionZip: Longer is Better but Not Necessary in Vision Language Models: Senqiao Yang,

Yukang Chen,

Zhuotao Tian,

Chengyao Wang,

Jingyao Li,

Bei Yu,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Senqiao and Chen, Yukang and Tian, Zhuotao and Wang, Chengyao and Li, Jingyao and Yu, Bei and Jia, Jiaya}, title = {VisionZip: Longer is Better but Not Necessary in Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19792-19802} }
Simplification Is All You Need against Out-of-Distribution Overconfidence: Keke Tang,

Chao Hou,

Weilong Peng,

Xiang Fang,

Zhize Wu,

Yongwei Nie,

Wenping Wang,

Zhihong Tian; [pdf]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Keke and Hou, Chao and Peng, Weilong and Fang, Xiang and Wu, Zhize and Nie, Yongwei and Wang, Wenping and Tian, Zhihong}, title = {Simplification Is All You Need against Out-of-Distribution Overconfidence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5030-5040} }
SpatialDreamer: Self-supervised Stereo Video Synthesis from Monocular Input: Zhen Lv,

Yangqi Long,

Congzhentao Huang,

Cao Li,

Chengfei Lv,

Hao Ren,

Dian Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Zhen and Long, Yangqi and Huang, Congzhentao and Li, Cao and Lv, Chengfei and Ren, Hao and Zheng, Dian}, title = {SpatialDreamer: Self-supervised Stereo Video Synthesis from Monocular Input}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {811-821} }
LOD-GS: Achieving Levels of Detail using Scalable Gaussian Soup: Jianxiong Shen,

Yue Qian,

Xiaohang Zhan; [pdf]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Jianxiong and Qian, Yue and Zhan, Xiaohang}, title = {LOD-GS: Achieving Levels of Detail using Scalable Gaussian Soup}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {671-680} }
BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing: Yunqi Gu,

Ian Huang,

Jihyeon Je,

Guandao Yang,

Leonidas Guibas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yunqi and Huang, Ian and Je, Jihyeon and Yang, Guandao and Guibas, Leonidas}, title = {BlenderGym: Benchmarking Foundational Model Systems for Graphics Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18574-18583} }
VoteFlow: Enforcing Local Rigidity in Self-Supervised Scene Flow: Yancong Lin,

Shiming Wang,

Liangliang Nan,

Julian Kooij,

Holger Caesar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Yancong and Wang, Shiming and Nan, Liangliang and Kooij, Julian and Caesar, Holger}, title = {VoteFlow: Enforcing Local Rigidity in Self-Supervised Scene Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17155-17164} }
The Devil is in Low-Level Features for Cross-Domain Few-Shot Segmentation: Yuhan Liu,

Yixiong Zou,

Yuhua Li,

Ruixuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhan and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {The Devil is in Low-Level Features for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4618-4627} }
Design2GarmentCode: Turning Design Concepts to Tangible Garments Through Program Synthesis: Feng Zhou,

Ruiyang Liu,

Chen Liu,

Gaofeng He,

Yong-Lu Li,

Xiaogang Jin,

Huamin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Feng and Liu, Ruiyang and Liu, Chen and He, Gaofeng and Li, Yong-Lu and Jin, Xiaogang and Wang, Huamin}, title = {Design2GarmentCode: Turning Design Concepts to Tangible Garments Through Program Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23712-23722} }
Uncertainty Weighted Gradients for Model Calibration: Jinxu Lin,

Linwei Tao,

Minjing Dong,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jinxu and Tao, Linwei and Dong, Minjing and Xu, Chang}, title = {Uncertainty Weighted Gradients for Model Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15497-15507} }
Efficient Dynamic Scene Editing via 4D Gaussian-based Static-Dynamic Separation: Joohyun Kwon,

Hanbyel Cho,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2025_CVPR, author = {Kwon, Joohyun and Cho, Hanbyel and Kim, Junmo}, title = {Efficient Dynamic Scene Editing via 4D Gaussian-based Static-Dynamic Separation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26855-26865} }
Unlearning through Knowledge Overwriting: Reversible Federated Unlearning via Selective Sparse Adapter: Zhengyi Zhong,

Weidong Bao,

Ji Wang,

Shuai Zhang,

Jingxuan Zhou,

Lingjuan Lyu,

Wei Yang Bryan Lim; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Zhengyi and Bao, Weidong and Wang, Ji and Zhang, Shuai and Zhou, Jingxuan and Lyu, Lingjuan and Lim, Wei Yang Bryan}, title = {Unlearning through Knowledge Overwriting: Reversible Federated Unlearning via Selective Sparse Adapter}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30661-30670} }
SocialMOIF: Multi-Order Intention Fusion for Pedestrian Trajectory Prediction: Kai Chen,

Xiaodong Zhao,

Yujie Huang,

Guoyu Fang,

Xiao Song,

Ruiping Wang,

Ziyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kai and Zhao, Xiaodong and Huang, Yujie and Fang, Guoyu and Song, Xiao and Wang, Ruiping and Wang, Ziyuan}, title = {SocialMOIF: Multi-Order Intention Fusion for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22465-22475} }
FFaceNeRF: Few-shot Face Editing in Neural Radiance Fields: Kwan Yun,

Chaelin Kim,

Hangyeul Shin,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_CVPR, author = {Yun, Kwan and Kim, Chaelin and Shin, Hangyeul and Noh, Junyong}, title = {FFaceNeRF: Few-shot Face Editing in Neural Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10825-10835} }
Discrete to Continuous: Generating Smooth Transition Poses from Sign Language Observations: Shengeng Tang,

Jiayi He,

Lechao Cheng,

Jingjing Wu,

Dan Guo,

Richang Hong; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Shengeng and He, Jiayi and Cheng, Lechao and Wu, Jingjing and Guo, Dan and Hong, Richang}, title = {Discrete to Continuous: Generating Smooth Transition Poses from Sign Language Observations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3481-3491} }
HistoFS: Non-IID Histopathologic Whole Slide Image Classification via Federated Style Transfer with RoI-Preserving: Farchan Hakim Raswa,

Chun-Shien Lu,

Jia-Ching Wang; [pdf] [supp]
[bibtex]
@InProceedings{Raswa_2025_CVPR, author = {Raswa, Farchan Hakim and Lu, Chun-Shien and Wang, Jia-Ching}, title = {HistoFS: Non-IID Histopathologic Whole Slide Image Classification via Federated Style Transfer with RoI-Preserving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30251-30260} }
Unified Medical Lesion Segmentation via Self-referring Indicator: Shijie Chang,

Xiaoqi Zhao,

Lihe Zhang,

Tiancheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Shijie and Zhao, Xiaoqi and Zhang, Lihe and Wang, Tiancheng}, title = {Unified Medical Lesion Segmentation via Self-referring Indicator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10414-10424} }
SGSST: Scaling Gaussian Splatting Style Transfer: Bruno Galerne,

Jianling Wang,

Lara Raad,

Jean-Michel Morel; [pdf] [supp]
[bibtex]
@InProceedings{Galerne_2025_CVPR, author = {Galerne, Bruno and Wang, Jianling and Raad, Lara and Morel, Jean-Michel}, title = {SGSST: Scaling Gaussian Splatting Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26535-26544} }
Learning Bijective Surface Parameterization for Inferring Signed Distance Functions from Sparse Point Clouds with Grid Deformation: Takeshi Noda,

Chao Chen,

Junsheng Zhou,

Weiqi Zhang,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noda_2025_CVPR, author = {Noda, Takeshi and Chen, Chao and Zhou, Junsheng and Zhang, Weiqi and Liu, Yu-Shen and Han, Zhizhong}, title = {Learning Bijective Surface Parameterization for Inferring Signed Distance Functions from Sparse Point Clouds with Grid Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22139-22149} }
Minimizing Labeled, Maximizing Unlabeled: An Image-Driven Approach for Video Instance Segmentation: Fangyun Wei,

Jinjing Zhao,

Kun Yan,

Chang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Fangyun and Zhao, Jinjing and Yan, Kun and Xu, Chang}, title = {Minimizing Labeled, Maximizing Unlabeled: An Image-Driven Approach for Video Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19304-19314} }
Layer- and Timestep-Adaptive Differentiable Token Compression Ratios for Efficient Diffusion Transformers: Haoran You,

Connelly Barnes,

Yuqian Zhou,

Yan Kang,

Zhenbang Du,

Wei Zhou,

Lingzhi Zhang,

Yotam Nitzan,

Xiaoyang Liu,

Zhe Lin,

Eli Shechtman,

Sohrab Amirghodsi,

Yingyan Celine Lin; [pdf] [supp]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Haoran and Barnes, Connelly and Zhou, Yuqian and Kang, Yan and Du, Zhenbang and Zhou, Wei and Zhang, Lingzhi and Nitzan, Yotam and Liu, Xiaoyang and Lin, Zhe and Shechtman, Eli and Amirghodsi, Sohrab and Lin, Yingyan Celine}, title = {Layer- and Timestep-Adaptive Differentiable Token Compression Ratios for Efficient Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18072-18082} }
Zero-shot RGB-D Point Cloud Registration with Pre-trained Large Vision Model: Haobo Jiang,

Jin Xie,

Jian Yang,

Liang Yu,

Jianmin Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Haobo and Xie, Jin and Yang, Jian and Yu, Liang and Zheng, Jianmin}, title = {Zero-shot RGB-D Point Cloud Registration with Pre-trained Large Vision Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16943-16952} }
Balancing Two Classifiers via A Simplex ETF Structure for Model Calibration: Jiani Ni,

He Zhao,

Jintong Gao,

Dandan Guo,

Hongyuan Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Jiani and Zhao, He and Gao, Jintong and Guo, Dandan and Zha, Hongyuan}, title = {Balancing Two Classifiers via A Simplex ETF Structure for Model Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30712-30721} }
DistinctAD: Distinctive Audio Description Generation in Contexts: Bo Fang,

Wenhao Wu,

Qiangqiang Wu,

Yuxin Song,

Antoni B. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Bo and Wu, Wenhao and Wu, Qiangqiang and Song, Yuxin and Chan, Antoni B.}, title = {DistinctAD: Distinctive Audio Description Generation in Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13571-13581} }
DAMM-Diffusion: Learning Divergence-Aware Multi-Modal Diffusion Model for Nanoparticles Distribution Prediction: Junjie Zhou,

Shouju Wang,

Yuxia Tang,

Qi Zhu,

Daoqiang Zhang,

Wei Shao; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Wang, Shouju and Tang, Yuxia and Zhu, Qi and Zhang, Daoqiang and Shao, Wei}, title = {DAMM-Diffusion: Learning Divergence-Aware Multi-Modal Diffusion Model for Nanoparticles Distribution Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30886-30895} }
Unveiling Differences in Generative Models: A Scalable Differential Clustering Approach: Jingwei Zhang,

Mohammad Jalali,

Cheuk Ting Li,

Farzan Farnia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jingwei and Jalali, Mohammad and Li, Cheuk Ting and Farnia, Farzan}, title = {Unveiling Differences in Generative Models: A Scalable Differential Clustering Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8269-8278} }
CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum Mixture-of-Experts for Continual Visual Question Answering: Tianyu Huai,

Jie Zhou,

Xingjiao Wu,

Qin Chen,

Qingchun Bai,

Ze Zhou,

Liang He; [pdf] [supp]
[bibtex]
@InProceedings{Huai_2025_CVPR, author = {Huai, Tianyu and Zhou, Jie and Wu, Xingjiao and Chen, Qin and Bai, Qingchun and Zhou, Ze and He, Liang}, title = {CL-MoE: Enhancing Multimodal Large Language Model with Dual Momentum Mixture-of-Experts for Continual Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19608-19617} }
Semantic Library Adaptation: LoRA Retrieval and Fusion for Open-Vocabulary Semantic Segmentation: Reza Qorbani,

Gianluca Villani,

Theodoros Panagiotakopoulos,

Marc Botet Colomer,

Linus Härenstam-Nielsen,

Mattia Segu,

Pier Luigi Dovesi,

Jussi Karlgren,

Daniel Cremers,

Federico Tombari,

Matteo Poggi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qorbani_2025_CVPR, author = {Qorbani, Reza and Villani, Gianluca and Panagiotakopoulos, Theodoros and Colomer, Marc Botet and H\"arenstam-Nielsen, Linus and Segu, Mattia and Dovesi, Pier Luigi and Karlgren, Jussi and Cremers, Daniel and Tombari, Federico and Poggi, Matteo}, title = {Semantic Library Adaptation: LoRA Retrieval and Fusion for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9804-9815} }
PhyS-EdiT: Physics-aware Semantic Image Editing with Text Description: Ziqi Cai,

Shuchen Weng,

Yifei Xia,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Ziqi and Weng, Shuchen and Xia, Yifei and Shi, Boxin}, title = {PhyS-EdiT: Physics-aware Semantic Image Editing with Text Description}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7867-7876} }
U-Know-DiffPAN: An Uncertainty-aware Knowledge Distillation Diffusion Framework with Details Enhancement for PAN-Sharpening: Sungpyo Kim,

Jeonghyeok Do,

Jaehyup Lee,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sungpyo and Do, Jeonghyeok and Lee, Jaehyup and Kim, Munchurl}, title = {U-Know-DiffPAN: An Uncertainty-aware Knowledge Distillation Diffusion Framework with Details Enhancement for PAN-Sharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23069-23079} }
SceneDiffuser++: City-Scale Traffic Simulation via a Generative World Model: Shuhan Tan,

John Lambert,

Hong Jeon,

Sakshum Kulshrestha,

Yijing Bai,

Jing Luo,

Dragomir Anguelov,

Mingxing Tan,

Chiyu Max Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Shuhan and Lambert, John and Jeon, Hong and Kulshrestha, Sakshum and Bai, Yijing and Luo, Jing and Anguelov, Dragomir and Tan, Mingxing and Jiang, Chiyu Max}, title = {SceneDiffuser++: City-Scale Traffic Simulation via a Generative World Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1570-1580} }
Gaussian Splatting Feature Fields for (Privacy-Preserving) Visual Localization: Maxime Pietrantoni,

Gabriela Csurka,

Torsten Sattler; [pdf] [supp]
[bibtex]
@InProceedings{Pietrantoni_2025_CVPR, author = {Pietrantoni, Maxime and Csurka, Gabriela and Sattler, Torsten}, title = {Gaussian Splatting Feature Fields for (Privacy-Preserving) Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1082-1092} }
Point Cloud Upsampling Using Conditional Diffusion Module with Adaptive Noise Suppression: Boqian Zhang,

Shen Yang,

Hao Chen,

Chao Yang,

Jing Jia,

Guang Jiang; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Boqian and Yang, Shen and Chen, Hao and Yang, Chao and Jia, Jing and Jiang, Guang}, title = {Point Cloud Upsampling Using Conditional Diffusion Module with Adaptive Noise Suppression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16987-16996} }
Gazing Into Missteps: Leveraging Eye-Gaze for Unsupervised Mistake Detection in Egocentric Videos of Skilled Human Activities: Michele Mazzamuto,

Antonino Furnari,

Yoichi Sato,

Giovanni Maria Farinella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mazzamuto_2025_CVPR, author = {Mazzamuto, Michele and Furnari, Antonino and Sato, Yoichi and Farinella, Giovanni Maria}, title = {Gazing Into Missteps: Leveraging Eye-Gaze for Unsupervised Mistake Detection in Egocentric Videos of Skilled Human Activities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8310-8320} }
Trajectory Mamba: Efficient Attention-Mamba Forecasting Model Based on Selective SSM: Yizhou Huang,

Yihua Cheng,

Kezhi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yizhou and Cheng, Yihua and Wang, Kezhi}, title = {Trajectory Mamba: Efficient Attention-Mamba Forecasting Model Based on Selective SSM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12058-12067} }
Recover and Match: Open-Vocabulary Multi-Label Recognition through Knowledge-Constrained Optimal Transport: Hao Tan,

Zichang Tan,

Jun Li,

Ajian Liu,

Jun Wan,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Hao and Tan, Zichang and Li, Jun and Liu, Ajian and Wan, Jun and Lei, Zhen}, title = {Recover and Match: Open-Vocabulary Multi-Label Recognition through Knowledge-Constrained Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4650-4660} }
RelationField: Relate Anything in Radiance Fields: Sebastian Koch,

Johanna Wald,

Mirco Colosi,

Narunas Vaskevicius,

Pedro Hermosilla,

Federico Tombari,

Timo Ropinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koch_2025_CVPR, author = {Koch, Sebastian and Wald, Johanna and Colosi, Mirco and Vaskevicius, Narunas and Hermosilla, Pedro and Tombari, Federico and Ropinski, Timo}, title = {RelationField: Relate Anything in Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21706-21716} }
DyFo: A Training-Free Dynamic Focus Visual Search for Enhancing LMMs in Fine-Grained Visual Understanding: Geng Li,

Jinglin Xu,

Yunzhen Zhao,

Yuxin Peng; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Geng and Xu, Jinglin and Zhao, Yunzhen and Peng, Yuxin}, title = {DyFo: A Training-Free Dynamic Focus Visual Search for Enhancing LMMs in Fine-Grained Visual Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9098-9108} }
From Head to Tail: Towards Balanced Representation in Large Vision-Language Models through Adaptive Data Calibration: Mingyang Song,

Xiaoye Qu,

Jiawei Zhou,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Mingyang and Qu, Xiaoye and Zhou, Jiawei and Cheng, Yu}, title = {From Head to Tail: Towards Balanced Representation in Large Vision-Language Models through Adaptive Data Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9434-9444} }
Let Humanoids Hike! Integrative Skill Development on Complex Trails: Kwan-Yee Lin,

Stella X. Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Kwan-Yee and Yu, Stella X.}, title = {Let Humanoids Hike! Integrative Skill Development on Complex Trails}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22498-22507} }
VLOGGER: Multimodal Diffusion for Embodied Avatar Synthesis: Enric Corona,

Andrei Zanfir,

Eduard Gabriel Bazavan,

Nikos Kolotouros,

Thiemo Alldieck,

Cristian Sminchisescu; [pdf] [arXiv]
[bibtex]
@InProceedings{Corona_2025_CVPR, author = {Corona, Enric and Zanfir, Andrei and Bazavan, Eduard Gabriel and Kolotouros, Nikos and Alldieck, Thiemo and Sminchisescu, Cristian}, title = {VLOGGER: Multimodal Diffusion for Embodied Avatar Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15896-15908} }
DEIM: DETR with Improved Matching for Fast Convergence: Shihua Huang,

Zhichao Lu,

Xiaodong Cun,

Yongjun Yu,

Xiao Zhou,

Xi Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Shihua and Lu, Zhichao and Cun, Xiaodong and Yu, Yongjun and Zhou, Xiao and Shen, Xi}, title = {DEIM: DETR with Improved Matching for Fast Convergence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15162-15171} }
BF-STVSR: B-Splines and Fourier---Best Friends for High Fidelity Spatial-Temporal Video Super-Resolution: Eunjin Kim,

Hyeonjin Kim,

Kyong Hwan Jin,

Jaejun Yoo; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Eunjin and Kim, Hyeonjin and Jin, Kyong Hwan and Yoo, Jaejun}, title = {BF-STVSR: B-Splines and Fourier---Best Friends for High Fidelity Spatial-Temporal Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28009-28018} }
DIO: Decomposable Implicit 4D Occupancy-Flow World Model: Christopher Diehl,

Quinlan Sykora,

Ben Agro,

Thomas Gilles,

Sergio Casas,

Raquel Urtasun; [pdf] [supp]
[bibtex]
@InProceedings{Diehl_2025_CVPR, author = {Diehl, Christopher and Sykora, Quinlan and Agro, Ben and Gilles, Thomas and Casas, Sergio and Urtasun, Raquel}, title = {DIO: Decomposable Implicit 4D Occupancy-Flow World Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27456-27466} }
SLADE: Shielding against Dual Exploits in Large Vision-Language Models: Md Zarif Hossain,

Ahmed Imteaj; [pdf] [supp]
[bibtex]
@InProceedings{Hossain_2025_CVPR, author = {Hossain, Md Zarif and Imteaj, Ahmed}, title = {SLADE: Shielding against Dual Exploits in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24244-24254} }
Human Motion Instruction Tuning: Lei Li,

Sen Jia,

Jianhao Wang,

Zhongyu Jiang,

Feng Zhou,

Ju Dai,

Tianfang Zhang,

Zongkai Wu,

Jenq-Neng Hwang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Lei and Jia, Sen and Wang, Jianhao and Jiang, Zhongyu and Zhou, Feng and Dai, Ju and Zhang, Tianfang and Wu, Zongkai and Hwang, Jenq-Neng}, title = {Human Motion Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17582-17591} }
A Flag Decomposition for Hierarchical Datasets: Nathan Mankovich,

Ignacio Santamaria,

Gustau Camps-Valls,

Tolga Birdal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mankovich_2025_CVPR, author = {Mankovich, Nathan and Santamaria, Ignacio and Camps-Valls, Gustau and Birdal, Tolga}, title = {A Flag Decomposition for Hierarchical Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18738-18748} }
RCP-Bench: Benchmarking Robustness for Collaborative Perception Under Diverse Corruptions: Shihang Du,

Sanqing Qu,

Tianhang Wang,

Xudong Zhang,

Yunwei Zhu,

Jian Mao,

Fan Lu,

Qiao Lin,

Guang Chen; [pdf] [supp]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Shihang and Qu, Sanqing and Wang, Tianhang and Zhang, Xudong and Zhu, Yunwei and Mao, Jian and Lu, Fan and Lin, Qiao and Chen, Guang}, title = {RCP-Bench: Benchmarking Robustness for Collaborative Perception Under Diverse Corruptions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11908-11918} }
Olympus: A Universal Task Router for Computer Vision Tasks: Yuanze Lin,

Yunsheng Li,

Dongdong Chen,

Weijian Xu,

Ronald Clark,

Philip Torr; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Yuanze and Li, Yunsheng and Chen, Dongdong and Xu, Weijian and Clark, Ronald and Torr, Philip}, title = {Olympus: A Universal Task Router for Computer Vision Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14235-14246} }
HERA: Hybrid Explicit Representation for Ultra-Realistic Head Avatars: Hongrui Cai,

Yuting Xiao,

Xuan Wang,

Jiafei Li,

Yudong Guo,

Yanbo Fan,

Shenghua Gao,

Juyong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Hongrui and Xiao, Yuting and Wang, Xuan and Li, Jiafei and Guo, Yudong and Fan, Yanbo and Gao, Shenghua and Zhang, Juyong}, title = {HERA: Hybrid Explicit Representation for Ultra-Realistic Head Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {260-270} }
Circumventing Shortcuts in Audio-visual Deepfake Detection Datasets with Unsupervised Learning: Stefan Smeu,

Dragos-Alexandru Boldisor,

Dan Oneata,

Elisabeta Oneata; [pdf] [arXiv]
[bibtex]
@InProceedings{Smeu_2025_CVPR, author = {Smeu, Stefan and Boldisor, Dragos-Alexandru and Oneata, Dan and Oneata, Elisabeta}, title = {Circumventing Shortcuts in Audio-visual Deepfake Detection Datasets with Unsupervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18815-18825} }
CoE: Chain-of-Explanation via Automatic Visual Concept Circuit Description and Polysemanticity Quantification: Wenlong Yu,

Qilong Wang,

Chuang Liu,

Dong Li,

Qinghua Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Wenlong and Wang, Qilong and Liu, Chuang and Li, Dong and Hu, Qinghua}, title = {CoE: Chain-of-Explanation via Automatic Visual Concept Circuit Description and Polysemanticity Quantification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4364-4374} }
Ego4o: Egocentric Human Motion Capture and Understanding from Multi-Modal Input: Jian Wang,

Rishabh Dabral,

Diogo Luvizon,

Zhe Cao,

Lingjie Liu,

Thabo Beeler,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Dabral, Rishabh and Luvizon, Diogo and Cao, Zhe and Liu, Lingjie and Beeler, Thabo and Theobalt, Christian}, title = {Ego4o: Egocentric Human Motion Capture and Understanding from Multi-Modal Input}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22668-22679} }
Image Over Text: Transforming Formula Recognition Evaluation with Character Detection Matching: Bin Wang,

Fan Wu,

Linke Ouyang,

Zhuangcheng Gu,

Rui Zhang,

Renqiu Xia,

Botian Shi,

Bo Zhang,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Bin and Wu, Fan and Ouyang, Linke and Gu, Zhuangcheng and Zhang, Rui and Xia, Renqiu and Shi, Botian and Zhang, Bo and He, Conghui}, title = {Image Over Text: Transforming Formula Recognition Evaluation with Character Detection Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19681-19690} }
FreePCA: Integrating Consistency Information across Long-short Frames in Training-free Long Video Generation via Principal Component Analysis: Jiangtong Tan,

Hu Yu,

Jie Huang,

Jie Xiao,

Feng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Jiangtong and Yu, Hu and Huang, Jie and Xiao, Jie and Zhao, Feng}, title = {FreePCA: Integrating Consistency Information across Long-short Frames in Training-free Long Video Generation via Principal Component Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27979-27988} }
Hierarchical Adaptive Filtering Network for Text Image Specular Highlight Removal: Zhi Jiang,

Jingbo Hu,

Ling Zhang,

Gang Fu,

Chunxia Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zhi and Hu, Jingbo and Zhang, Ling and Fu, Gang and Xiao, Chunxia}, title = {Hierarchical Adaptive Filtering Network for Text Image Specular Highlight Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2408-2417} }
Improving Semi-Supervised Semantic Segmentation with Sliced-Wasserstein Feature Alignment and Uniformity: Chen-Yi Lu,

Kasra Derakhshandeh,

Somali Chaterji; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Chen-Yi and Derakhshandeh, Kasra and Chaterji, Somali}, title = {Improving Semi-Supervised Semantic Segmentation with Sliced-Wasserstein Feature Alignment and Uniformity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20233-20243} }
Mind the Time: Temporally-Controlled Multi-Event Video Generation: Ziyi Wu,

Aliaksandr Siarohin,

Willi Menapace,

Ivan Skorokhodov,

Yuwei Fang,

Varnith Chordia,

Igor Gilitschenski,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Ziyi and Siarohin, Aliaksandr and Menapace, Willi and Skorokhodov, Ivan and Fang, Yuwei and Chordia, Varnith and Gilitschenski, Igor and Tulyakov, Sergey}, title = {Mind the Time: Temporally-Controlled Multi-Event Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23989-24000} }
Learning Extremely High Density Crowds as Active Matters: Feixiang He,

Jiangbei Yue,

Jialin Zhu,

Armin Seyfried,

Dan Casas,

Julien Pettré,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Feixiang and Yue, Jiangbei and Zhu, Jialin and Seyfried, Armin and Casas, Dan and Pettr\'e, Julien and Wang, He}, title = {Learning Extremely High Density Crowds as Active Matters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {540-550} }
Audio-Visual Semantic Graph Network for Audio-Visual Event Localization: Liang Liu,

Shuaiyong Li,

Yongqiang Zhu; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Liang and Li, Shuaiyong and Zhu, Yongqiang}, title = {Audio-Visual Semantic Graph Network for Audio-Visual Event Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23957-23966} }
3D-Mem: 3D Scene Memory for Embodied Exploration and Reasoning: Yuncong Yang,

Han Yang,

Jiachen Zhou,

Peihao Chen,

Hongxin Zhang,

Yilun Du,

Chuang Gan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yuncong and Yang, Han and Zhou, Jiachen and Chen, Peihao and Zhang, Hongxin and Du, Yilun and Gan, Chuang}, title = {3D-Mem: 3D Scene Memory for Embodied Exploration and Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17294-17303} }
EchoMimicV2: Towards Striking, Simplified, and Semi-Body Human Animation: Rang Meng,

Xingyu Zhang,

Yuming Li,

Chenguang Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Rang and Zhang, Xingyu and Li, Yuming and Ma, Chenguang}, title = {EchoMimicV2: Towards Striking, Simplified, and Semi-Body Human Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5489-5498} }
Navigation World Models: Amir Bar,

Gaoyue Zhou,

Danny Tran,

Trevor Darrell,

Yann LeCun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bar_2025_CVPR, author = {Bar, Amir and Zhou, Gaoyue and Tran, Danny and Darrell, Trevor and LeCun, Yann}, title = {Navigation World Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15791-15801} }
Video Motion Transfer with Diffusion Transformers: Alexander Pondaven,

Aliaksandr Siarohin,

Sergey Tulyakov,

Philip Torr,

Fabio Pizzati; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pondaven_2025_CVPR, author = {Pondaven, Alexander and Siarohin, Aliaksandr and Tulyakov, Sergey and Torr, Philip and Pizzati, Fabio}, title = {Video Motion Transfer with Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22911-22921} }
Gaussian Splatting for Efficient Satellite Image Photogrammetry: Luca Savant Aira,

Gabriele Facciolo,

Thibaud Ehret; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aira_2025_CVPR, author = {Aira, Luca Savant and Facciolo, Gabriele and Ehret, Thibaud}, title = {Gaussian Splatting for Efficient Satellite Image Photogrammetry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5959-5969} }
Unified Reconstruction of Static and Dynamic Scenes from Events: Qiyao Gao,

Peiqi Duan,

Hanyue Lou,

Minggui Teng,

Ziqi Cai,

Xu Chen,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Qiyao and Duan, Peiqi and Lou, Hanyue and Teng, Minggui and Cai, Ziqi and Chen, Xu and Shi, Boxin}, title = {Unified Reconstruction of Static and Dynamic Scenes from Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27914-27923} }
Automatic Spectral Calibration of Hyperspectral Images: Method, Dataset and Benchmark: Zhuoran Du,

Shaodi You,

Cheng Cheng,

Shikui Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Zhuoran and You, Shaodi and Cheng, Cheng and Wei, Shikui}, title = {Automatic Spectral Calibration of Hyperspectral Images: Method, Dataset and Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28081-28090} }
Conformal Prediction and MLLM aided Uncertainty Quantification in Scene Graph Generation: Sayak Nag,

Udita Ghosh,

Calvin-Khang Ta,

Sarosij Bose,

Jiachen Li,

Amit K. Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nag_2025_CVPR, author = {Nag, Sayak and Ghosh, Udita and Ta, Calvin-Khang and Bose, Sarosij and Li, Jiachen and Roy-Chowdhury, Amit K.}, title = {Conformal Prediction and MLLM aided Uncertainty Quantification in Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11676-11686} }
Point-to-Region Loss for Semi-Supervised Point-Based Crowd Counting: Wei Lin,

Chenyang Zhao,

Antoni B. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Wei and Zhao, Chenyang and Chan, Antoni B.}, title = {Point-to-Region Loss for Semi-Supervised Point-Based Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29363-29373} }
Reconstructing Close Human Interaction with Appearance and Proxemics Reasoning: Buzhen Huang,

Chen Li,

Chongyang Xu,

Dongyue Lu,

Jinnan Chen,

Yangang Wang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Buzhen and Li, Chen and Xu, Chongyang and Lu, Dongyue and Chen, Jinnan and Wang, Yangang and Lee, Gim Hee}, title = {Reconstructing Close Human Interaction with Appearance and Proxemics Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17475-17485} }
Towards Improved Text-Aligned Codebook Learning: Multi-Hierarchical Codebook-Text Alignment with Long Text: Guotao Liang,

Baoquan Zhang,

Zhiyuan Wen,

Junteng Zhao,

Yunming Ye,

Kola Ye,

Yao He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Guotao and Zhang, Baoquan and Wen, Zhiyuan and Zhao, Junteng and Ye, Yunming and Ye, Kola and He, Yao}, title = {Towards Improved Text-Aligned Codebook Learning: Multi-Hierarchical Codebook-Text Alignment with Long Text}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4060-4069} }
Parallel Sequence Modeling via Generalized Spatial Propagation Network: Hongjun Wang,

Wonmin Byeon,

Jiarui Xu,

Jinwei Gu,

Ka Chun Cheung,

Xiaolong Wang,

Kai Han,

Jan Kautz,

Sifei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hongjun and Byeon, Wonmin and Xu, Jiarui and Gu, Jinwei and Cheung, Ka Chun and Wang, Xiaolong and Han, Kai and Kautz, Jan and Liu, Sifei}, title = {Parallel Sequence Modeling via Generalized Spatial Propagation Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4473-4483} }
Scenario Dreamer: Vectorized Latent Diffusion for Generating Driving Simulation Environments: Luke Rowe,

Roger Girgis,

Anthony Gosselin,

Liam Paull,

Christopher Pal,

Felix Heide; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rowe_2025_CVPR, author = {Rowe, Luke and Girgis, Roger and Gosselin, Anthony and Paull, Liam and Pal, Christopher and Heide, Felix}, title = {Scenario Dreamer: Vectorized Latent Diffusion for Generating Driving Simulation Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17207-17218} }
Poly-Autoregressive Prediction for Modeling Interactions: Neerja Thakkar,

Tara Sadjadpour,

Jathushan Rajasegeran,

Shiry Ginosar,

Jitendra Malik; [pdf] [supp]
[bibtex]
@InProceedings{Thakkar_2025_CVPR, author = {Thakkar, Neerja and Sadjadpour, Tara and Rajasegeran, Jathushan and Ginosar, Shiry and Malik, Jitendra}, title = {Poly-Autoregressive Prediction for Modeling Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12402-12412} }
NADER: Neural Architecture Design via Multi-Agent Collaboration: Zekang Yang,

Wang Zeng,

Sheng Jin,

Chen Qian,

Ping Luo,

Wentao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zekang and Zeng, Wang and Jin, Sheng and Qian, Chen and Luo, Ping and Liu, Wentao}, title = {NADER: Neural Architecture Design via Multi-Agent Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4452-4461} }
Move-in-2D: 2D-Conditioned Human Motion Generation: Hsin-Ping Huang,

Yang Zhou,

Jui-Hsien Wang,

Difan Liu,

Feng Liu,

Ming-Hsuan Yang,

Zhan Xu; [pdf]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Hsin-Ping and Zhou, Yang and Wang, Jui-Hsien and Liu, Difan and Liu, Feng and Yang, Ming-Hsuan and Xu, Zhan}, title = {Move-in-2D: 2D-Conditioned Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22766-22775} }
PoseBH: Prototypical Multi-Dataset Training Beyond Human Pose Estimation: Uyoung Jeong,

Jonathan Freer,

Seungryul Baek,

Hyung Jin Chang,

Kwang In Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Uyoung and Freer, Jonathan and Baek, Seungryul and Chang, Hyung Jin and Kim, Kwang In}, title = {PoseBH: Prototypical Multi-Dataset Training Beyond Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12278-12288} }
MATCHA: Towards Matching Anything: Fei Xue,

Sven Elflein,

Laura Leal-Taixé,

Qunjie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Fei and Elflein, Sven and Leal-Taix\'e, Laura and Zhou, Qunjie}, title = {MATCHA: Towards Matching Anything}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27081-27091} }
CTRL-D: Controllable Dynamic 3D Scene Editing with Personalized 2D Diffusion: Kai He,

Chin-Hsuan Wu,

Igor Gilitschenski; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Kai and Wu, Chin-Hsuan and Gilitschenski, Igor}, title = {CTRL-D: Controllable Dynamic 3D Scene Editing with Personalized 2D Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26630-26640} }
Separation of Powers: On Segregating Knowledge from Observation in LLM-enabled Knowledge-based Visual Question Answering: Zhen Yang,

Zhuo Tao,

Qi Chen,

Liang Li,

Yuankai Qi,

Anton van den Hengel,

Qingming Huang; [pdf]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhen and Tao, Zhuo and Chen, Qi and Li, Liang and Qi, Yuankai and van den Hengel, Anton and Huang, Qingming}, title = {Separation of Powers: On Segregating Knowledge from Observation in LLM-enabled Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24753-24762} }
Decision SpikeFormer: Spike-Driven Transformer for Decision Making: Wei Huang,

Qinying Gu,

Nanyang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Wei and Gu, Qinying and Ye, Nanyang}, title = {Decision SpikeFormer: Spike-Driven Transformer for Decision Making}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19241-19250} }
SF2T: Self-supervised Fragment Finetuning of Video-LLMs for Fine-Grained Understanding: Yangliu Hu,

Zikai Song,

Na Feng,

Yawei Luo,

Junqing Yu,

Yi-Ping Phoebe Chen,

Wei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Yangliu and Song, Zikai and Feng, Na and Luo, Yawei and Yu, Junqing and Chen, Yi-Ping Phoebe and Yang, Wei}, title = {SF2T: Self-supervised Fragment Finetuning of Video-LLMs for Fine-Grained Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29108-29117} }
Theory-Inspired Deep Multi-View Multi-Label Learning with Incomplete Views and Noisy Labels: Quanjiang Li,

Tingjin Luo,

Jiahui Liao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Quanjiang and Luo, Tingjin and Liao, Jiahui}, title = {Theory-Inspired Deep Multi-View Multi-Label Learning with Incomplete Views and Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20706-20715} }
Fitted Neural Lossless Image Compression: Zhe Zhang,

Zhenzhong Chen,

Shan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhe and Chen, Zhenzhong and Liu, Shan}, title = {Fitted Neural Lossless Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23249-23258} }
Fortifying Federated Learning Towards Trustworthiness via Auditable Data Valuation and Verifiable Client Contribution: K Naveen Kumar,

Ranjeet Ranjan Jha,

C Krishna Mohan,

Ravindra Babu Tallamraju; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2025_CVPR, author = {Kumar, K Naveen and Jha, Ranjeet Ranjan and Mohan, C Krishna and Tallamraju, Ravindra Babu}, title = {Fortifying Federated Learning Towards Trustworthiness via Auditable Data Valuation and Verifiable Client Contribution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4999-5009} }
EMOE: Modality-Specific Enhanced Dynamic Emotion Experts: Yiyang Fang,

Wenke Huang,

Guancheng Wan,

Kehua Su,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Yiyang and Huang, Wenke and Wan, Guancheng and Su, Kehua and Ye, Mang}, title = {EMOE: Modality-Specific Enhanced Dynamic Emotion Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14314-14324} }
JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration: Yunlong Lin,

Zixu Lin,

Haoyu Chen,

Panwang Pan,

Chenxin Li,

Sixiang Chen,

Kairun Wen,

Yeying Jin,

Wenbo Li,

Xinghao Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Yunlong and Lin, Zixu and Chen, Haoyu and Pan, Panwang and Li, Chenxin and Chen, Sixiang and Wen, Kairun and Jin, Yeying and Li, Wenbo and Ding, Xinghao}, title = {JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22369-22380} }
UniPre3D: Unified Pre-training of 3D Point Cloud Models with Cross-Modal Gaussian Splatting: Ziyi Wang,

Yanran Zhang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ziyi and Zhang, Yanran and Zhou, Jie and Lu, Jiwen}, title = {UniPre3D: Unified Pre-training of 3D Point Cloud Models with Cross-Modal Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1319-1329} }
Charm: The Missing Piece in ViT Fine-Tuning for Image Aesthetic Assessment: Fatemeh Behrad,

Tinne Tuytelaars,

Johan Wagemans; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behrad_2025_CVPR, author = {Behrad, Fatemeh and Tuytelaars, Tinne and Wagemans, Johan}, title = {Charm: The Missing Piece in ViT Fine-Tuning for Image Aesthetic Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7815-7824} }
F-LMM: Grounding Frozen Large Multimodal Models: Size Wu,

Sheng Jin,

Wenwei Zhang,

Lumin Xu,

Wentao Liu,

Wei Li,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Size and Jin, Sheng and Zhang, Wenwei and Xu, Lumin and Liu, Wentao and Li, Wei and Loy, Chen Change}, title = {F-LMM: Grounding Frozen Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24710-24721} }
EntityErasure: Erasing Entity Cleanly via Amodal Entity Segmentation and Completion: Yixing Zhu,

Qing Zhang,

Yitong Wang,

Yongwei Nie,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yixing and Zhang, Qing and Wang, Yitong and Nie, Yongwei and Zheng, Wei-Shi}, title = {EntityErasure: Erasing Entity Cleanly via Amodal Entity Segmentation and Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28274-28283} }
Generative Video Propagation: Shaoteng Liu,

Tianyu Wang,

Jui-Hsien Wang,

Qing Liu,

Zhifei Zhang,

Joon-Young Lee,

Yijun Li,

Bei Yu,

Zhe Lin,

Soo Ye Kim,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shaoteng and Wang, Tianyu and Wang, Jui-Hsien and Liu, Qing and Zhang, Zhifei and Lee, Joon-Young and Li, Yijun and Yu, Bei and Lin, Zhe and Kim, Soo Ye and Jia, Jiaya}, title = {Generative Video Propagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17712-17722} }
From Multimodal LLMs to Generalist Embodied Agents: Methods and Lessons: Andrew Szot,

Bogdan Mazoure,

Omar Attia,

Aleksei Timofeev,

Harsh Agrawal,

Devon Hjelm,

Zhe Gan,

Zsolt Kira,

Alexander Toshev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Szot_2025_CVPR, author = {Szot, Andrew and Mazoure, Bogdan and Attia, Omar and Timofeev, Aleksei and Agrawal, Harsh and Hjelm, Devon and Gan, Zhe and Kira, Zsolt and Toshev, Alexander}, title = {From Multimodal LLMs to Generalist Embodied Agents: Methods and Lessons}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10644-10655} }
Mosaic3D: Foundation Dataset and Model for Open-Vocabulary 3D Segmentation: Junha Lee,

Chunghyun Park,

Jaesung Choe,

Yu-Chiang Frank Wang,

Jan Kautz,

Minsu Cho,

Chris Choy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Junha and Park, Chunghyun and Choe, Jaesung and Wang, Yu-Chiang Frank and Kautz, Jan and Cho, Minsu and Choy, Chris}, title = {Mosaic3D: Foundation Dataset and Model for Open-Vocabulary 3D Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14089-14101} }
T-CIL: Temperature Scaling using Adversarial Perturbation for Calibration in Class-Incremental Learning: Seong-Hyeon Hwang,

Minsu Kim,

Steven Euijong Whang; [pdf] [supp]
[bibtex]
@InProceedings{Hwang_2025_CVPR, author = {Hwang, Seong-Hyeon and Kim, Minsu and Whang, Steven Euijong}, title = {T-CIL: Temperature Scaling using Adversarial Perturbation for Calibration in Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15339-15348} }
LoRA Subtraction for Drift-Resistant Space in Exemplar-Free Continual Learning: Xuan Liu,

Xiaobin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xuan and Chang, Xiaobin}, title = {LoRA Subtraction for Drift-Resistant Space in Exemplar-Free Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15308-15318} }
Joint Out-of-Distribution Filtering and Data Discovery Active Learning: Sebastian Schmidt,

Leonard Schenk,

Leo Schwinn,

Stephan Günnemann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schmidt_2025_CVPR, author = {Schmidt, Sebastian and Schenk, Leonard and Schwinn, Leo and G\"unnemann, Stephan}, title = {Joint Out-of-Distribution Filtering and Data Discovery Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25677-25687} }
AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer: Jin Lyu,

Tianyi Zhu,

Yi Gu,

Li Lin,

Pujin Cheng,

Yebin Liu,

Xiaoying Tang,

Liang An; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lyu_2025_CVPR, author = {Lyu, Jin and Zhu, Tianyi and Gu, Yi and Lin, Li and Cheng, Pujin and Liu, Yebin and Tang, Xiaoying and An, Liang}, title = {AniMer: Animal Pose and Shape Estimation Using Family Aware Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17486-17496} }
Co-op: Correspondence-based Novel Object Pose Estimation: Sungphill Moon,

Hyeontae Son,

Dongcheol Hur,

Sangwook Kim; [pdf] [supp]
[bibtex]
@InProceedings{Moon_2025_CVPR, author = {Moon, Sungphill and Son, Hyeontae and Hur, Dongcheol and Kim, Sangwook}, title = {Co-op: Correspondence-based Novel Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11622-11632} }
Finding Local Diffusion Schrodinger Bridge using Kolmogorov-Arnold Network: Xingyu Qiu,

Mengying Yang,

Xinghua Ma,

Fanding Li,

Dong Liang,

Gongning Luo,

Wei Wang,

Kuanquan Wang,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Xingyu and Yang, Mengying and Ma, Xinghua and Li, Fanding and Liang, Dong and Luo, Gongning and Wang, Wei and Wang, Kuanquan and Li, Shuo}, title = {Finding Local Diffusion Schrodinger Bridge using Kolmogorov-Arnold Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23227-23236} }
CorrBEV: Multi-View 3D Object Detection by Correlation Learning with Multi-modal Prototypes: Ziteng Xue,

Mingzhe Guo,

Heng Fan,

Shihui Zhang,

Zhipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Ziteng and Guo, Mingzhe and Fan, Heng and Zhang, Shihui and Zhang, Zhipeng}, title = {CorrBEV: Multi-View 3D Object Detection by Correlation Learning with Multi-modal Prototypes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27413-27423} }
Completion as Enhancement: A Degradation-Aware Selective Image Guided Network for Depth Completion: Zhiqiang Yan,

Zhengxue Wang,

Kun Wang,

Jun Li,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Zhiqiang and Wang, Zhengxue and Wang, Kun and Li, Jun and Yang, Jian}, title = {Completion as Enhancement: A Degradation-Aware Selective Image Guided Network for Depth Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26943-26953} }
CATANet: Efficient Content-Aware Token Aggregation for Lightweight Image Super-Resolution: Xin Liu,

Jie Liu,

Jie Tang,

Gangshan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xin and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {CATANet: Efficient Content-Aware Token Aggregation for Lightweight Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17902-17912} }
SeeGround: See and Ground for Zero-Shot Open-Vocabulary 3D Visual Grounding: Rong Li,

Shijie Li,

Lingdong Kong,

Xulei Yang,

Junwei Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Rong and Li, Shijie and Kong, Lingdong and Yang, Xulei and Liang, Junwei}, title = {SeeGround: See and Ground for Zero-Shot Open-Vocabulary 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3707-3717} }
RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories: Huiyang Shao,

Xin Xia,

Yuhong Yang,

Yuxi Ren,

Xing Wang,

Xuefeng Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Huiyang and Xia, Xin and Yang, Yuhong and Ren, Yuxi and Wang, Xing and Xiao, Xuefeng}, title = {RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18113-18123} }
Linear Attention Modeling for Learned Image Compression: Donghui Feng,

Zhengxue Cheng,

Shen Wang,

Ronghua Wu,

Hongwei Hu,

Guo Lu,

Li Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Donghui and Cheng, Zhengxue and Wang, Shen and Wu, Ronghua and Hu, Hongwei and Lu, Guo and Song, Li}, title = {Linear Attention Modeling for Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7623-7632} }
Around the World in 80 Timesteps: A Generative Approach to Global Visual Geolocation: Nicolas Dufour,

Vicky Kalogeiton,

David Picard,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dufour_2025_CVPR, author = {Dufour, Nicolas and Kalogeiton, Vicky and Picard, David and Landrieu, Loic}, title = {Around the World in 80 Timesteps: A Generative Approach to Global Visual Geolocation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23016-23026} }
Asynchronous Collaborative Graph Representation for Frames and Events: Dianze Li,

Jianing Li,

Xu Liu,

Xiaopeng Fan,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Dianze and Li, Jianing and Liu, Xu and Fan, Xiaopeng and Tian, Yonghong}, title = {Asynchronous Collaborative Graph Representation for Frames and Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1655-1666} }
Real-time High-fidelity Gaussian Human Avatars with Position-based Interpolation of Spatially Distributed MLPs: Youyi Zhan,

Tianjia Shao,

Yin Yang,

Kun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2025_CVPR, author = {Zhan, Youyi and Shao, Tianjia and Yang, Yin and Zhou, Kun}, title = {Real-time High-fidelity Gaussian Human Avatars with Position-based Interpolation of Spatially Distributed MLPs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26297-26307} }
ReconDreamer: Crafting World Models for Driving Scene Reconstruction via Online Restoration: Chaojun Ni,

Guosheng Zhao,

Xiaofeng Wang,

Zheng Zhu,

Wenkang Qin,

Guan Huang,

Chen Liu,

Yuyin Chen,

Yida Wang,

Xueyang Zhang,

Yifei Zhan,

Kun Zhan,

Peng Jia,

Xianpeng Lang,

Xingang Wang,

Wenjun Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Chaojun and Zhao, Guosheng and Wang, Xiaofeng and Zhu, Zheng and Qin, Wenkang and Huang, Guan and Liu, Chen and Chen, Yuyin and Wang, Yida and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Jia, Peng and Lang, Xianpeng and Wang, Xingang and Mei, Wenjun}, title = {ReconDreamer: Crafting World Models for Driving Scene Reconstruction via Online Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1559-1569} }
RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments: Haisheng Su,

Feixiang Song,

Cong Ma,

Wei Wu,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Haisheng and Song, Feixiang and Ma, Cong and Wu, Wei and Yan, Junchi}, title = {RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27446-27455} }
Self-Supervised Large Scale Point Cloud Completion for Archaeological Site Restoration: Aocheng Li,

James R. Zimmer-Dauphinee,

Rajesh Kalyanam,

Ian Lindsay,

Parker VanValkenburgh,

Steven Wernke,

Daniel Aliaga; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Aocheng and Zimmer-Dauphinee, James R. and Kalyanam, Rajesh and Lindsay, Ian and VanValkenburgh, Parker and Wernke, Steven and Aliaga, Daniel}, title = {Self-Supervised Large Scale Point Cloud Completion for Archaeological Site Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11759-11768} }
Chain of Attack: On the Robustness of Vision-Language Models Against Transfer-Based Adversarial Attacks: Peng Xie,

Yequan Bie,

Jianda Mao,

Yangqiu Song,

Yang Wang,

Hao Chen,

Kani Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Peng and Bie, Yequan and Mao, Jianda and Song, Yangqiu and Wang, Yang and Chen, Hao and Chen, Kani}, title = {Chain of Attack: On the Robustness of Vision-Language Models Against Transfer-Based Adversarial Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14679-14689} }
Rate-In: Information-Driven Adaptive Dropout Rates for Improved Inference-Time Uncertainty Estimation: Tal Zeevi,

Ravid Shwartz-Ziv,

Yann LeCun,

Lawrence H. Staib,

John A. Onofrey; [pdf] [supp]
[bibtex]
@InProceedings{Zeevi_2025_CVPR, author = {Zeevi, Tal and Shwartz-Ziv, Ravid and LeCun, Yann and Staib, Lawrence H. and Onofrey, John A.}, title = {Rate-In: Information-Driven Adaptive Dropout Rates for Improved Inference-Time Uncertainty Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20757-20766} }
Thin-Shell-SfT: Fine-Grained Monocular Non-rigid 3D Surface Tracking with Neural Deformation Fields: Navami Kairanda,

Marc Habermann,

Shanthika Naik,

Christian Theobalt,

Vladislav Golyanik; [pdf] [supp]
[bibtex]
@InProceedings{Kairanda_2025_CVPR, author = {Kairanda, Navami and Habermann, Marc and Naik, Shanthika and Theobalt, Christian and Golyanik, Vladislav}, title = {Thin-Shell-SfT: Fine-Grained Monocular Non-rigid 3D Surface Tracking with Neural Deformation Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11373-11383} }
DeCLIP: Decoupled Learning for Open-Vocabulary Dense Perception: Junjie Wang,

Bin Chen,

Yulin Li,

Bin Kang,

Yichi Chen,

Zhuotao Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Junjie and Chen, Bin and Li, Yulin and Kang, Bin and Chen, Yichi and Tian, Zhuotao}, title = {DeCLIP: Decoupled Learning for Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14824-14834} }
SocialGesture: Delving into Multi-person Gesture Understanding: Xu Cao,

Pranav Virupaksha,

Wenqi Jia,

Bolin Lai,

Fiona Ryan,

Sangmin Lee,

James M. Rehg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Xu and Virupaksha, Pranav and Jia, Wenqi and Lai, Bolin and Ryan, Fiona and Lee, Sangmin and Rehg, James M.}, title = {SocialGesture: Delving into Multi-person Gesture Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19509-19519} }
GenFusion: Closing the Loop between Reconstruction and Generation via Videos: Sibo Wu,

Congrong Xu,

Binbin Huang,

Andreas Geiger,

Anpei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Sibo and Xu, Congrong and Huang, Binbin and Geiger, Andreas and Chen, Anpei}, title = {GenFusion: Closing the Loop between Reconstruction and Generation via Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6078-6088} }
The PanAf-FGBG Dataset: Understanding the Impact of Backgrounds in Wildlife Behaviour Recognition: Otto Brookes,

Maksim Kukushkin,

Majid Mirmehdi,

Colleen Stephens,

Paula Dieguez,

Thurston C. Hicks,

Sorrel Jones,

Kevin Lee,

Maureen S. McCarthy,

Amelia Meier,

Emmanuelle Normand,

Erin G. Wessling,

Roman M. Wittig,

Kevin Langergraber,

Klaus Zuberbühler,

Lukas Boesch,

Thomas Schmid,

Mimi Arandjelovic,

Hjalmar Kühl,

Tilo Burghardt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brookes_2025_CVPR, author = {Brookes, Otto and Kukushkin, Maksim and Mirmehdi, Majid and Stephens, Colleen and Dieguez, Paula and Hicks, Thurston C. and Jones, Sorrel and Lee, Kevin and McCarthy, Maureen S. and Meier, Amelia and Normand, Emmanuelle and Wessling, Erin G. and Wittig, Roman M. and Langergraber, Kevin and Zuberb\"uhler, Klaus and Boesch, Lukas and Schmid, Thomas and Arandjelovic, Mimi and K\"uhl, Hjalmar and Burghardt, Tilo}, title = {The PanAf-FGBG Dataset: Understanding the Impact of Backgrounds in Wildlife Behaviour Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5433-5443} }
Multi-modal Topology-embedded Graph Learning for Spatially Resolved Genes Prediction from Pathology Images with Prior Gene Similarity Information: Hang Shi,

Changxi Chi,

Peng Wan,

Daoqiang Zhang,

Wei Shao; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Hang and Chi, Changxi and Wan, Peng and Zhang, Daoqiang and Shao, Wei}, title = {Multi-modal Topology-embedded Graph Learning for Spatially Resolved Genes Prediction from Pathology Images with Prior Gene Similarity Information}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20810-20819} }
Question-Aware Gaussian Experts for Audio-Visual Question Answering: Hongyeob Kim,

Inyoung Jung,

Dayoon Suh,

Youjia Zhang,

Sangmin Lee,

Sungeun Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Hongyeob and Jung, Inyoung and Suh, Dayoon and Zhang, Youjia and Lee, Sangmin and Hong, Sungeun}, title = {Question-Aware Gaussian Experts for Audio-Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13681-13690} }
Sonic: Shifting Focus to Global Audio Perception in Portrait Animation: Xiaozhong Ji,

Xiaobin Hu,

Zhihong Xu,

Junwei Zhu,

Chuming Lin,

Qingdong He,

Jiangning Zhang,

Donghao Luo,

Yi Chen,

Qin Lin,

Qinglin Lu,

Chengjie Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Xiaozhong and Hu, Xiaobin and Xu, Zhihong and Zhu, Junwei and Lin, Chuming and He, Qingdong and Zhang, Jiangning and Luo, Donghao and Chen, Yi and Lin, Qin and Lu, Qinglin and Wang, Chengjie}, title = {Sonic: Shifting Focus to Global Audio Perception in Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {193-203} }
Multitwine: Multi-Object Compositing with Text and Layout Control: Gemma Canet Tarrés,

Zhe Lin,

Zhifei Zhang,

He Zhang,

Andrew Gilbert,

John Collomosse,

Soo Ye Kim; [pdf] [supp]
[bibtex]
@InProceedings{Tarres_2025_CVPR, author = {Tarr\'es, Gemma Canet and Lin, Zhe and Zhang, Zhifei and Zhang, He and Gilbert, Andrew and Collomosse, John and Kim, Soo Ye}, title = {Multitwine: Multi-Object Compositing with Text and Layout Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8094-8104} }
DEFOM-Stereo: Depth Foundation Model Based Stereo Matching: Hualie Jiang,

Zhiqiang Lou,

Laiyan Ding,

Rui Xu,

Minglang Tan,

Wenjie Jiang,

Rui Huang; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Hualie and Lou, Zhiqiang and Ding, Laiyan and Xu, Rui and Tan, Minglang and Jiang, Wenjie and Huang, Rui}, title = {DEFOM-Stereo: Depth Foundation Model Based Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21857-21867} }
Adaptive Rectangular Convolution for Remote Sensing Pansharpening: Xueyang Wang,

Zhixin Zheng,

Jiandong Shao,

Yule Duan,

Liang-Jian Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xueyang and Zheng, Zhixin and Shao, Jiandong and Duan, Yule and Deng, Liang-Jian}, title = {Adaptive Rectangular Convolution for Remote Sensing Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17872-17881} }
Video Depth without Video Models: Bingxin Ke,

Dominik Narnhofer,

Shengyu Huang,

Lei Ke,

Torben Peters,

Katerina Fragkiadaki,

Anton Obukhov,

Konrad Schindler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Bingxin and Narnhofer, Dominik and Huang, Shengyu and Ke, Lei and Peters, Torben and Fragkiadaki, Katerina and Obukhov, Anton and Schindler, Konrad}, title = {Video Depth without Video Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7233-7243} }
PointLoRA: Low-Rank Adaptation with Token Selection for Point Cloud Learning: Song Wang,

Xiaolu Liu,

Lingdong Kong,

Jianyun Xu,

Chunyong Hu,

Gongfan Fang,

Wentong Li,

Jianke Zhu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Song and Liu, Xiaolu and Kong, Lingdong and Xu, Jianyun and Hu, Chunyong and Fang, Gongfan and Li, Wentong and Zhu, Jianke and Wang, Xinchao}, title = {PointLoRA: Low-Rank Adaptation with Token Selection for Point Cloud Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6605-6615} }
HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset: Zedong Chu,

Feng Xiong,

Meiduo Liu,

Jinzhi Zhang,

Mingqi Shao,

Zhaoxu Sun,

Di Wang,

Mu Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Chu_2025_CVPR, author = {Chu, Zedong and Xiong, Feng and Liu, Meiduo and Zhang, Jinzhi and Shao, Mingqi and Sun, Zhaoxu and Wang, Di and Xu, Mu}, title = {HumanRig: Learning Automatic Rigging for Humanoid Character in a Large Scale Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {304-313} }
GaussHDR: High Dynamic Range Gaussian Splatting via Learning Unified 3D and 2D Local Tone Mapping: Jinfeng Liu,

Lingtong Kong,

Bo Li,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jinfeng and Kong, Lingtong and Li, Bo and Xu, Dan}, title = {GaussHDR: High Dynamic Range Gaussian Splatting via Learning Unified 3D and 2D Local Tone Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5991-6000} }
UIBDiffusion: Universal Imperceptible Backdoor Attack for Diffusion Models: Yuning Han,

Bingyin Zhao,

Rui Chu,

Feng Luo,

Biplab Sikdar,

Yingjie Lao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Yuning and Zhao, Bingyin and Chu, Rui and Luo, Feng and Sikdar, Biplab and Lao, Yingjie}, title = {UIBDiffusion: Universal Imperceptible Backdoor Attack for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19186-19196} }
DiskVPS: Vanishing Point Detector via Hough Transform in a Disk Region: Jianping Wu; [pdf]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jianping}, title = {DiskVPS: Vanishing Point Detector via Hough Transform in a Disk Region}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27049-27058} }
Seeing Far and Clearly: Mitigating Hallucinations in MLLMs with Attention Causal Decoding: Feilong Tang,

Chengzhi Liu,

Zhongxing Xu,

Ming Hu,

Zile Huang,

Haochen Xue,

Ziyang Chen,

Zelin Peng,

Zhiwei Yang,

Sijin Zhou,

Wenxue Li,

Yulong Li,

Wenxuan Song,

Shiyan Su,

Wei Feng,

Jionglong Su,

Mingquan Lin,

Yifan Peng,

Xuelian Cheng,

Imran Razzak,

Zongyuan Ge; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Feilong and Liu, Chengzhi and Xu, Zhongxing and Hu, Ming and Huang, Zile and Xue, Haochen and Chen, Ziyang and Peng, Zelin and Yang, Zhiwei and Zhou, Sijin and Li, Wenxue and Li, Yulong and Song, Wenxuan and Su, Shiyan and Feng, Wei and Su, Jionglong and Lin, Mingquan and Peng, Yifan and Cheng, Xuelian and Razzak, Imran and Ge, Zongyuan}, title = {Seeing Far and Clearly: Mitigating Hallucinations in MLLMs with Attention Causal Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26147-26159} }
Towards Autonomous Micromobility through Scalable Urban Simulation: Wayne Wu,

Honglin He,

Chaoyuan Zhang,

Jack He,

Seth Z. Zhao,

Ran Gong,

Quanyi Li,

Bolei Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Wayne and He, Honglin and Zhang, Chaoyuan and He, Jack and Zhao, Seth Z. and Gong, Ran and Li, Quanyi and Zhou, Bolei}, title = {Towards Autonomous Micromobility through Scalable Urban Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27553-27563} }
FisherTune: Fisher-Guided Robust Tuning of Vision Foundation Models for Domain Generalized Segmentation: Dong Zhao,

Jinlong Li,

Shuang Wang,

Mengyao Wu,

Qi Zang,

Nicu Sebe,

Zhun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Dong and Li, Jinlong and Wang, Shuang and Wu, Mengyao and Zang, Qi and Sebe, Nicu and Zhong, Zhun}, title = {FisherTune: Fisher-Guided Robust Tuning of Vision Foundation Models for Domain Generalized Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15043-15054} }
Language-Assisted Debiasing and Smoothing for Foundation Model-Based Semi-Supervised Learning: Na Zheng,

Xuemeng Song,

Xue Dong,

Aashish Nikhil Ghosh,

Liqiang Nie,

Roger Zimmermann; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Na and Song, Xuemeng and Dong, Xue and Ghosh, Aashish Nikhil and Nie, Liqiang and Zimmermann, Roger}, title = {Language-Assisted Debiasing and Smoothing for Foundation Model-Based Semi-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25708-25717} }
EdgeMovingNet: Edge-preserving Point Cloud Reconstruction via Joint Geometry Features: Xinran Yang,

Donghao Ji,

Yuanqi Li,

Junyuan Xie,

Jie Guo,

Yanwen Guo; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Xinran and Ji, Donghao and Li, Yuanqi and Xie, Junyuan and Guo, Jie and Guo, Yanwen}, title = {EdgeMovingNet: Edge-preserving Point Cloud Reconstruction via Joint Geometry Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22150-22160} }
AdMiT: Adaptive Multi-Source Tuning in Dynamic Environments: Xiangyu Chang,

Fahim Faisal Niloy,

Sk Miraj Ahmed,

Srikanth V. Krishnamurthy,

Basak Guler,

Ananthram Swami,

Samet Oymak,

Amit Roy-Chowdhury; [pdf] [supp]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Xiangyu and Niloy, Fahim Faisal and Ahmed, Sk Miraj and Krishnamurthy, Srikanth V. and Guler, Basak and Swami, Ananthram and Oymak, Samet and Roy-Chowdhury, Amit}, title = {AdMiT: Adaptive Multi-Source Tuning in Dynamic Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20569-20579} }
Unbiased Video Scene Graph Generation via Visual and Semantic Dual Debiasing: Yanjun Li,

Zhaoyang Li,

Honghui Chen,

Lizhi Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yanjun and Li, Zhaoyang and Chen, Honghui and Xu, Lizhi}, title = {Unbiased Video Scene Graph Generation via Visual and Semantic Dual Debiasing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19047-19056} }
Channel-wise Noise Scheduled Diffusion for Inverse Rendering in Indoor Scenes: JunYong Choi,

Min-cheol Sagong,

SeokYeong Lee,

Seung-Won Jung,

Ig-Jae Kim,

Junghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, JunYong and Sagong, Min-cheol and Lee, SeokYeong and Jung, Seung-Won and Kim, Ig-Jae and Cho, Junghyun}, title = {Channel-wise Noise Scheduled Diffusion for Inverse Rendering in Indoor Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5773-5782} }
Targeted Forgetting of Image Subgroups in CLIP Models: Zeliang Zhang,

Gaowen Liu,

Charles Fleming,

Ramana Rao Kompella,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zeliang and Liu, Gaowen and Fleming, Charles and Kompella, Ramana Rao and Xu, Chenliang}, title = {Targeted Forgetting of Image Subgroups in CLIP Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9870-9880} }
Unleashing In-context Learning of Autoregressive Models for Few-shot Image Manipulation: Bolin Lai,

Felix Juefei-Xu,

Miao Liu,

Xiaoliang Dai,

Nikhil Mehta,

Chenguang Zhu,

Zeyi Huang,

James M. Rehg,

Sangmin Lee,

Ning Zhang,

Tong Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Bolin and Juefei-Xu, Felix and Liu, Miao and Dai, Xiaoliang and Mehta, Nikhil and Zhu, Chenguang and Huang, Zeyi and Rehg, James M. and Lee, Sangmin and Zhang, Ning and Xiao, Tong}, title = {Unleashing In-context Learning of Autoregressive Models for Few-shot Image Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18346-18357} }
Harnessing Frozen Unimodal Encoders for Flexible Multimodal Alignment: Mayug Maniparambil,

Raiymbek Akshulakov,

Yasser Abdelaziz Dahou Djilali,

Sanath Narayan,

Ankit Singh,

Noel E. O'Connor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maniparambil_2025_CVPR, author = {Maniparambil, Mayug and Akshulakov, Raiymbek and Djilali, Yasser Abdelaziz Dahou and Narayan, Sanath and Singh, Ankit and O'Connor, Noel E.}, title = {Harnessing Frozen Unimodal Encoders for Flexible Multimodal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29847-29857} }
Feature Information Driven Position Gaussian Distribution Estimation for Tiny Object Detection: Jinghao Bian,

Mingtao Feng,

Weisheng Dong,

Fangfang Wu,

Jianqiao Luo,

Yaonan Wang,

Guangming Shi; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2025_CVPR, author = {Bian, Jinghao and Feng, Mingtao and Dong, Weisheng and Wu, Fangfang and Luo, Jianqiao and Wang, Yaonan and Shi, Guangming}, title = {Feature Information Driven Position Gaussian Distribution Estimation for Tiny Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30376-30386} }
Enhancing Diversity for Data-free Quantization: Kai Zhao,

Zhihao Zhuang,

Miao Zhang,

Chenjuan Guo,

Yang Shu,

Bin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Kai and Zhuang, Zhihao and Zhang, Miao and Guo, Chenjuan and Shu, Yang and Yang, Bin}, title = {Enhancing Diversity for Data-free Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20969-20978} }
SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model: Chunlin Yu,

Hanqing Wang,

Ye Shi,

Haoyang Luo,

Sibei Yang,

Jingyi Yu,

Jingya Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Chunlin and Wang, Hanqing and Shi, Ye and Luo, Haoyang and Yang, Sibei and Yu, Jingyi and Wang, Jingya}, title = {SeqAfford: Sequential 3D Affordance Reasoning via Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1691-1701} }
DSV-LFS: Unifying LLM-Driven Semantic Cues with Visual Features for Robust Few-Shot Segmentation: Amin Karimi,

Charalambos Poullis; [pdf] [supp]
[bibtex]
@InProceedings{Karimi_2025_CVPR, author = {Karimi, Amin and Poullis, Charalambos}, title = {DSV-LFS: Unifying LLM-Driven Semantic Cues with Visual Features for Robust Few-Shot Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4584-4594} }
Revisiting Generative Replay for Class Incremental Object Detection: Shizhou Zhang,

Xueqiang Lv,

Yinghui Xing,

Qirui Wu,

Di Xu,

Yanning Zhang; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shizhou and Lv, Xueqiang and Xing, Yinghui and Wu, Qirui and Xu, Di and Zhang, Yanning}, title = {Revisiting Generative Replay for Class Incremental Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20340-20349} }
SemAlign3D: Semantic Correspondence between RGB-Images through Aligning 3D Object-Class Representations: Krispin Wandel,

Hesheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wandel_2025_CVPR, author = {Wandel, Krispin and Wang, Hesheng}, title = {SemAlign3D: Semantic Correspondence between RGB-Images through Aligning 3D Object-Class Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1138-1147} }
Bridging Viewpoint Gaps: Geometric Reasoning Boosts Semantic Correspondence: Qiyang Qian,

Hansheng Chen,

Masayoshi Tomizuka,

Kurt Keutzer,

Qianqian Wang,

Chenfeng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Qiyang and Chen, Hansheng and Tomizuka, Masayoshi and Keutzer, Kurt and Wang, Qianqian and Xu, Chenfeng}, title = {Bridging Viewpoint Gaps: Geometric Reasoning Boosts Semantic Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11579-11589} }
DPU: Dynamic Prototype Updating for Multimodal Out-of-Distribution Detection: Shawn Li,

Huixian Gong,

Hao Dong,

Tiankai Yang,

Zhengzhong Tu,

Yue Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shawn and Gong, Huixian and Dong, Hao and Yang, Tiankai and Tu, Zhengzhong and Zhao, Yue}, title = {DPU: Dynamic Prototype Updating for Multimodal Out-of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10193-10202} }
Towards Unbiased and Robust Spatio-Temporal Scene Graph Generation and Anticipation: Rohith Peddi,

Saurabh Saurabh,

Ayush Abhay Shrivastava,

Parag Singla,

Vibhav Gogate; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peddi_2025_CVPR, author = {Peddi, Rohith and Saurabh, Saurabh and Shrivastava, Ayush Abhay and Singla, Parag and Gogate, Vibhav}, title = {Towards Unbiased and Robust Spatio-Temporal Scene Graph Generation and Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8648-8657} }
Spatial Transport Optimization by Repositioning Attention Map for Training-Free Text-to-Image Synthesis: Woojung Han,

Yeonkyung Lee,

Chanyoung Kim,

Kwanghyun Park,

Seong Jae Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Woojung and Lee, Yeonkyung and Kim, Chanyoung and Park, Kwanghyun and Hwang, Seong Jae}, title = {Spatial Transport Optimization by Repositioning Attention Map for Training-Free Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18401-18410} }
From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport: Quentin Bouniot,

Ievgen Redko,

Anton Mallasto,

Charlotte Laclau,

Oliver Struckmeier,

Karol Arndt,

Markus Heinonen,

Ville Kyrki,

Samuel Kaski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bouniot_2025_CVPR, author = {Bouniot, Quentin and Redko, Ievgen and Mallasto, Anton and Laclau, Charlotte and Struckmeier, Oliver and Arndt, Karol and Heinonen, Markus and Kyrki, Ville and Kaski, Samuel}, title = {From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25250-25260} }
Prompt2Perturb (P2P): Text-Guided Diffusion-Based Adversarial Attack on Breast Ultrasound Images: Yasamin Medghalchi,

Moein Heidari,

Clayton Allard,

Leonid Sigal,

Ilker Hacihaliloglu; [pdf] [arXiv]
[bibtex]
@InProceedings{Medghalchi_2025_CVPR, author = {Medghalchi, Yasamin and Heidari, Moein and Allard, Clayton and Sigal, Leonid and Hacihaliloglu, Ilker}, title = {Prompt2Perturb (P2P): Text-Guided Diffusion-Based Adversarial Attack on Breast Ultrasound Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28564-28574} }
Boosting Domain Incremental Learning: Selecting the Optimal Parameters is All You Need: Qiang Wang,

Xiang Song,

Yuhang He,

Jizhou Han,

Chenhao Ding,

Xinyuan Gao,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qiang and Song, Xiang and He, Yuhang and Han, Jizhou and Ding, Chenhao and Gao, Xinyuan and Gong, Yihong}, title = {Boosting Domain Incremental Learning: Selecting the Optimal Parameters is All You Need}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4839-4849} }
COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection: Jinqi Xiao,

Shen Sang,

Tiancheng Zhi,

Jing Liu,

Qing Yan,

Linjie Luo,

Bo Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Jinqi and Sang, Shen and Zhi, Tiancheng and Liu, Jing and Yan, Qing and Luo, Linjie and Yuan, Bo}, title = {COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30116-30126} }
Perceptual Inductive Bias Is What You Need Before Contrastive Learning: Junru Zhao,

Tianqin Li,

Dunhan Jiang,

Shenghao Wu,

Alan Ramirez,

Tai Sing Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Junru and Li, Tianqin and Jiang, Dunhan and Wu, Shenghao and Ramirez, Alan and Lee, Tai Sing}, title = {Perceptual Inductive Bias Is What You Need Before Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9621-9630} }
FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs: Xiaoqin Wang,

Xusen Ma,

Xianxu Hou,

Meidan Ding,

Yudong Li,

Junliang Chen,

Wenting Chen,

Xiaoyang Peng,

Linlin Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xiaoqin and Ma, Xusen and Hou, Xianxu and Ding, Meidan and Li, Yudong and Chen, Junliang and Chen, Wenting and Peng, Xiaoyang and Shen, Linlin}, title = {FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9154-9164} }
EffiDec3D: An Optimized Decoder for High-Performance and Efficient 3D Medical Image Segmentation: Md Mostafijur Rahman,

Radu Marculescu; [pdf] [supp]
[bibtex]
@InProceedings{Rahman_2025_CVPR, author = {Rahman, Md Mostafijur and Marculescu, Radu}, title = {EffiDec3D: An Optimized Decoder for High-Performance and Efficient 3D Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10435-10444} }
Exploring Historical Information for RGBE Visual Tracking with Mamba: Chuanyu Sun,

Jiqing Zhang,

Yang Wang,

Huilin Ge,

Qianchen Xia,

Baocai Yin,

Xin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Chuanyu and Zhang, Jiqing and Wang, Yang and Ge, Huilin and Xia, Qianchen and Yin, Baocai and Yang, Xin}, title = {Exploring Historical Information for RGBE Visual Tracking with Mamba}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6500-6509} }
Gyro-based Neural Single Image Deblurring: Heemin Yang,

Jaesung Rim,

Seungyong Lee,

Seung-Hwan Baek,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Heemin and Rim, Jaesung and Lee, Seungyong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Gyro-based Neural Single Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23111-23120} }
ArtiScene: Language-Driven Artistic 3D Scene Generation Through Image Intermediary: Zeqi Gu,

Yin Cui,

Zhaoshuo Li,

Fangyin Wei,

Yunhao Ge,

Jinwei Gu,

Ming-Yu Liu,

Abe Davis,

Yifan Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Zeqi and Cui, Yin and Li, Zhaoshuo and Wei, Fangyin and Ge, Yunhao and Gu, Jinwei and Liu, Ming-Yu and Davis, Abe and Ding, Yifan}, title = {ArtiScene: Language-Driven Artistic 3D Scene Generation Through Image Intermediary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2891-2901} }
MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data: Zifan Wang,

Ziqing Chen,

Junyu Chen,

Jilong Wang,

Yuxin Yang,

Yunze Liu,

Xueyi Liu,

He Wang,

Li Yi; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zifan and Chen, Ziqing and Chen, Junyu and Wang, Jilong and Yang, Yuxin and Liu, Yunze and Liu, Xueyi and Wang, He and Yi, Li}, title = {MobileH2R: Learning Generalizable Human to Mobile Robot Handover Exclusively from Scalable and Diverse Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17315-17325} }
Improving Sound Source Localization with Joint Slot Attention on Image and Audio: Inho Kim,

Youngkil Song,

Jicheol Park,

Won Hwa Kim,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Inho and Song, Youngkil and Park, Jicheol and Kim, Won Hwa and Kwak, Suha}, title = {Improving Sound Source Localization with Joint Slot Attention on Image and Audio}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3121-3130} }
Improved Monocular Depth Prediction Using Distance Transform Over Pre-semantic Contours with Self-supervised Neural Networks: Marwane Hariat,

Antoine Manzanera,

David Filliat; [pdf] [supp]
[bibtex]
@InProceedings{Hariat_2025_CVPR, author = {Hariat, Marwane and Manzanera, Antoine and Filliat, David}, title = {Improved Monocular Depth Prediction Using Distance Transform Over Pre-semantic Contours with Self-supervised Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21868-21879} }
Feature-Preserving Mesh Decimation for Normal Integration: Moritz Heep,

Sven Behnke,

Eduard Zell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heep_2025_CVPR, author = {Heep, Moritz and Behnke, Sven and Zell, Eduard}, title = {Feature-Preserving Mesh Decimation for Normal Integration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5783-5792} }
Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body: Zeqing Wang,

Qingyang Ma,

Wentao Wan,

Haojie Li,

Keze Wang,

Yonghong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zeqing and Ma, Qingyang and Wan, Wentao and Li, Haojie and Wang, Keze and Tian, Yonghong}, title = {Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21226-21237} }
PERSE: Personalized 3D Generative Avatars from A Single Portrait: Hyunsoo Cha,

Inhee Lee,

Hanbyul Joo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2025_CVPR, author = {Cha, Hyunsoo and Lee, Inhee and Joo, Hanbyul}, title = {PERSE: Personalized 3D Generative Avatars from A Single Portrait}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15953-15962} }
Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation: Yuhui Zhang,

Yuchang Su,

Yiming Liu,

Xiaohan Wang,

James Burgess,

Elaine Sui,

Chenyu Wang,

Josiah Aklilu,

Alejandro Lozano,

Anjiang Wei,

Ludwig Schmidt,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuhui and Su, Yuchang and Liu, Yiming and Wang, Xiaohan and Burgess, James and Sui, Elaine and Wang, Chenyu and Aklilu, Josiah and Lozano, Alejandro and Wei, Anjiang and Schmidt, Ludwig and Yeung-Levy, Serena}, title = {Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29580-29590} }
DexHandDiff: Interaction-aware Diffusion Planning for Adaptive Dexterous Manipulation: Zhixuan Liang,

Yao Mu,

Yixiao Wang,

Tianxing Chen,

Wenqi Shao,

Wei Zhan,

Masayoshi Tomizuka,

Ping Luo,

Mingyu Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Zhixuan and Mu, Yao and Wang, Yixiao and Chen, Tianxing and Shao, Wenqi and Zhan, Wei and Tomizuka, Masayoshi and Luo, Ping and Ding, Mingyu}, title = {DexHandDiff: Interaction-aware Diffusion Planning for Adaptive Dexterous Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1745-1755} }
VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness: SeungJu Cha,

Kwanyoung Lee,

Ye-Chan Kim,

Hyunwoo Oh,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2025_CVPR, author = {Cha, SeungJu and Lee, Kwanyoung and Kim, Ye-Chan and Oh, Hyunwoo and Kim, Dong-Jin}, title = {VerbDiff: Text-Only Diffusion Models with Enhanced Interaction Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8041-8050} }
ROLL: Robust Noisy Pseudo-label Learning for Multi-View Clustering with Noisy Correspondence: Yuan Sun,

Yongxiang Li,

Zhenwen Ren,

Guiduo Duan,

Dezhong Peng,

Peng Hu; [pdf]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yuan and Li, Yongxiang and Ren, Zhenwen and Duan, Guiduo and Peng, Dezhong and Hu, Peng}, title = {ROLL: Robust Noisy Pseudo-label Learning for Multi-View Clustering with Noisy Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30732-30741} }
Towards In-the-wild 3D Plane Reconstruction from a Single Image: Jiachen Liu,

Rui Yu,

Sili Chen,

Sharon X. Huang,

Hengkai Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiachen and Yu, Rui and Chen, Sili and Huang, Sharon X. and Guo, Hengkai}, title = {Towards In-the-wild 3D Plane Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27027-27037} }
Memories of Forgotten Concepts: Matan Rusanovsky,

Shimon Malnick,

Amir Jevnisek,

Ohad Fried,

Shai Avidan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rusanovsky_2025_CVPR, author = {Rusanovsky, Matan and Malnick, Shimon and Jevnisek, Amir and Fried, Ohad and Avidan, Shai}, title = {Memories of Forgotten Concepts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2966-2975} }
Dynamic Stereotype Theory Induced Micro-expression Recognition with Oriented Deformation: Bohao Zhang,

Xuejiao Wang,

Changbo Wang,

Gaoqi He; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bohao and Wang, Xuejiao and Wang, Changbo and He, Gaoqi}, title = {Dynamic Stereotype Theory Induced Micro-expression Recognition with Oriented Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10701-10711} }
PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction: Eduard Poesina,

Adriana Valentina Costache,

Adrian-Gabriel Chifu,

Josiane Mothe,

Radu Tudor Ionescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Poesina_2025_CVPR, author = {Poesina, Eduard and Costache, Adriana Valentina and Chifu, Adrian-Gabriel and Mothe, Josiane and Ionescu, Radu Tudor}, title = {PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28651-28661} }
CheXwhatsApp: A Dataset for Exploring Challenges in the Diagnosis of Chest X-rays through Mobile Devices: Mariamma Antony,

Rajiv Porana,

Sahil M Lathiya,

Siva Teja Kakileti,

Chiranjib Bhattacharyya; [pdf] [supp]
[bibtex]
@InProceedings{Antony_2025_CVPR, author = {Antony, Mariamma and Porana, Rajiv and Lathiya, Sahil M and Kakileti, Siva Teja and Bhattacharyya, Chiranjib}, title = {CheXwhatsApp: A Dataset for Exploring Challenges in the Diagnosis of Chest X-rays through Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25887-25896} }
PSBD: Prediction Shift Uncertainty Unlocks Backdoor Detection: Wei Li,

Pin-Yu Chen,

Sijia Liu,

Ren Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wei and Chen, Pin-Yu and Liu, Sijia and Wang, Ren}, title = {PSBD: Prediction Shift Uncertainty Unlocks Backdoor Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10255-10264} }
Degradation-Aware Feature Perturbation for All-in-One Image Restoration: Xiangpeng Tian,

Xiangyu Liao,

Xiao Liu,

Meng Li,

Chao Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Xiangpeng and Liao, Xiangyu and Liu, Xiao and Li, Meng and Ren, Chao}, title = {Degradation-Aware Feature Perturbation for All-in-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28165-28175} }
ACL: Activating Capability of Linear Attention for Image Restoration: Yubin Gu,

Yuan Meng,

Jiayi Ji,

Xiaoshuai Sun; [pdf]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yubin and Meng, Yuan and Ji, Jiayi and Sun, Xiaoshuai}, title = {ACL: Activating Capability of Linear Attention for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17913-17923} }
GenDeg: Diffusion-based Degradation Synthesis for Generalizable All-In-One Image Restoration: Sudarshan Rajagopalan,

Nithin Gopalakrishnan Nair,

Jay N. Paranjape,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rajagopalan_2025_CVPR, author = {Rajagopalan, Sudarshan and Nair, Nithin Gopalakrishnan and Paranjape, Jay N. and Patel, Vishal M.}, title = {GenDeg: Diffusion-based Degradation Synthesis for Generalizable All-In-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28144-28154} }
Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction: Wenke Xia,

Ruoxuan Feng,

Dong Wang,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Wenke and Feng, Ruoxuan and Wang, Dong and Hu, Di}, title = {Phoenix: A Motion-based Self-Reflection Framework for Fine-grained Robotic Action Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6981-6990} }
The Power of Context: How Multimodality Improves Image Super-Resolution: Kangfu Mei,

Hossein Talebi,

Mojtaba Ardakani,

Vishal M. Patel,

Peyman Milanfar,

Mauricio Delbracio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Kangfu and Talebi, Hossein and Ardakani, Mojtaba and Patel, Vishal M. and Milanfar, Peyman and Delbracio, Mauricio}, title = {The Power of Context: How Multimodality Improves Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23141-23152} }
MARBLE: Material Recomposition and Blending in CLIP-Space: Ta Ying Cheng,

Prafull Sharma,

Mark Boss,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Ta Ying and Sharma, Prafull and Boss, Mark and Jampani, Varun}, title = {MARBLE: Material Recomposition and Blending in CLIP-Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13061-13071} }
Multirate Neural Image Compression with Adaptive Lattice Vector Quantization: Hao Xu,

Xiaolin Wu,

Xi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Hao and Wu, Xiaolin and Zhang, Xi}, title = {Multirate Neural Image Compression with Adaptive Lattice Vector Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7633-7642} }
EventFly: Event Camera Perception from Ground to the Sky: Lingdong Kong,

Dongyue Lu,

Xiang Xu,

Lai Xing Ng,

Wei Tsang Ooi,

Benoit R. Cottereau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Lingdong and Lu, Dongyue and Xu, Xiang and Ng, Lai Xing and Ooi, Wei Tsang and Cottereau, Benoit R.}, title = {EventFly: Event Camera Perception from Ground to the Sky}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1472-1484} }
Detect Any Mirrors: Boosting Learning Reliability on Large-Scale Unlabeled Data with an Iterative Data Engine: Zhaohu Xing,

Lihao Liu,

Yijun Yang,

Hongqiu Wang,

Tian Ye,

Sixiang Chen,

Wenxue Li,

Guang Liu,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Zhaohu and Liu, Lihao and Yang, Yijun and Wang, Hongqiu and Ye, Tian and Chen, Sixiang and Li, Wenxue and Liu, Guang and Zhu, Lei}, title = {Detect Any Mirrors: Boosting Learning Reliability on Large-Scale Unlabeled Data with an Iterative Data Engine}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25476-25486} }
CH3Depth: Efficient and Flexible Depth Foundation Model with Flow Matching: Jiaqi Li,

Yiran Wang,

Jinghong Zheng,

Junrui Zhang,

Liao Shen,

Tianqi Liu,

Zhiguo Cao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiaqi and Wang, Yiran and Zheng, Jinghong and Zhang, Junrui and Shen, Liao and Liu, Tianqi and Cao, Zhiguo}, title = {CH3Depth: Efficient and Flexible Depth Foundation Model with Flow Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7222-7232} }
Pow3R: Empowering Unconstrained 3D Reconstruction with Camera and Scene Priors: Wonbong Jang,

Philippe Weinzaepfel,

Vincent Leroy,

Lourdes Agapito,

Jerome Revaud; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Wonbong and Weinzaepfel, Philippe and Leroy, Vincent and Agapito, Lourdes and Revaud, Jerome}, title = {Pow3R: Empowering Unconstrained 3D Reconstruction with Camera and Scene Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1071-1081} }
Efficient Visual State Space Model for Image Deblurring: Lingshun Kong,

Jiangxin Dong,

Jinhui Tang,

Ming-Hsuan Yang,

Jinshan Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Lingshun and Dong, Jiangxin and Tang, Jinhui and Yang, Ming-Hsuan and Pan, Jinshan}, title = {Efficient Visual State Space Model for Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12710-12719} }
4D LangSplat: 4D Language Gaussian Splatting via Multimodal Large Language Models: Wanhua Li,

Renping Zhou,

Jiawei Zhou,

Yingwei Song,

Johannes Herter,

Minghan Qin,

Gao Huang,

Hanspeter Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wanhua and Zhou, Renping and Zhou, Jiawei and Song, Yingwei and Herter, Johannes and Qin, Minghan and Huang, Gao and Pfister, Hanspeter}, title = {4D LangSplat: 4D Language Gaussian Splatting via Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22001-22011} }
MambaVLT: Time-Evolving Multimodal State Space Model for Vision-Language Tracking: Xinqi Liu,

Li Zhou,

Zikun Zhou,

Jianqiu Chen,

Zhenyu He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xinqi and Zhou, Li and Zhou, Zikun and Chen, Jianqiu and He, Zhenyu}, title = {MambaVLT: Time-Evolving Multimodal State Space Model for Vision-Language Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8731-8741} }
Enhancing 3D Gaze Estimation in the Wild using Weak Supervision with Gaze Following Labels: Pierre Vuillecard,

Jean-Marc Odobez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vuillecard_2025_CVPR, author = {Vuillecard, Pierre and Odobez, Jean-Marc}, title = {Enhancing 3D Gaze Estimation in the Wild using Weak Supervision with Gaze Following Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13508-13518} }
Reward Fine-Tuning Two-Step Diffusion Models via Learning Differentiable Latent-Space Surrogate Reward: Zhiwei Jia,

Yuesong Nan,

Huixi Zhao,

Gengdai Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Zhiwei and Nan, Yuesong and Zhao, Huixi and Liu, Gengdai}, title = {Reward Fine-Tuning Two-Step Diffusion Models via Learning Differentiable Latent-Space Surrogate Reward}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12912-12922} }
Detecting Out-of-Distribution Through the Lens of Neural Collapse: Litian Liu,

Yao Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Litian and Qin, Yao}, title = {Detecting Out-of-Distribution Through the Lens of Neural Collapse}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15424-15433} }
Adaptive Parameter Selection for Tuning Vision-Language Models: Yi Zhang,

Yi-Xuan Deng,

Meng-Hao Guo,

Shi-Min Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yi and Deng, Yi-Xuan and Guo, Meng-Hao and Hu, Shi-Min}, title = {Adaptive Parameter Selection for Tuning Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4280-4290} }
MotionMap: Representing Multimodality in Human Pose Forecasting: Reyhaneh Hosseininejad,

Megh Shukla,

Saeed Saadatnejad,

Mathieu Salzmann,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hosseininejad_2025_CVPR, author = {Hosseininejad, Reyhaneh and Shukla, Megh and Saadatnejad, Saeed and Salzmann, Mathieu and Alahi, Alexandre}, title = {MotionMap: Representing Multimodality in Human Pose Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22680-22689} }
Learning-enabled Polynomial Lyapunov Function Synthesis via High-Accuracy Counterexample-Guided Framework: Hanrui Zhao,

Niuniu Qi,

Mengxin Ren,

Banglong Liu,

Shuming Shi,

Zhengfeng Yang; [pdf]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hanrui and Qi, Niuniu and Ren, Mengxin and Liu, Banglong and Shi, Shuming and Yang, Zhengfeng}, title = {Learning-enabled Polynomial Lyapunov Function Synthesis via High-Accuracy Counterexample-Guided Framework}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10275-10284} }
Factored-NeuS: Reconstructing Surfaces, Illumination, and Materials of Possibly Glossy Objects: Yue Fan,

Ningjing Fan,

Ivan Skorokhodov,

Oleg Voynov,

Savva Ignatyev,

Evgeny Burnaev,

Peter Wonka,

Yiqun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Yue and Fan, Ningjing and Skorokhodov, Ivan and Voynov, Oleg and Ignatyev, Savva and Burnaev, Evgeny and Wonka, Peter and Wang, Yiqun}, title = {Factored-NeuS: Reconstructing Surfaces, Illumination, and Materials of Possibly Glossy Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21317-21327} }
GaussianSpa: An "Optimizing-Sparsifying" Simplification Framework for Compact and High-Quality 3D Gaussian Splatting: Yangming Zhang,

Wenqi Jia,

Wei Niu,

Miao Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yangming and Jia, Wenqi and Niu, Wei and Yin, Miao}, title = {GaussianSpa: An ''Optimizing-Sparsifying'' Simplification Framework for Compact and High-Quality 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26673-26682} }
Sparse2DGS: Geometry-Prioritized Gaussian Splatting for Surface Reconstruction from Sparse Views: Jiang Wu,

Rui Li,

Yu Zhu,

Rong Guo,

Jinqiu Sun,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jiang and Li, Rui and Zhu, Yu and Guo, Rong and Sun, Jinqiu and Zhang, Yanning}, title = {Sparse2DGS: Geometry-Prioritized Gaussian Splatting for Surface Reconstruction from Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11307-11316} }
VinTAGe: Joint Video and Text Conditioning for Holistic Audio Generation: Saksham Singh Kushwaha,

Yapeng Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kushwaha_2025_CVPR, author = {Kushwaha, Saksham Singh and Tian, Yapeng}, title = {VinTAGe: Joint Video and Text Conditioning for Holistic Audio Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13529-13539} }
Efficient Decoupled Feature 3D Gaussian Splatting via Hierarchical Compression: Zhenqi Dai,

Ting Liu,

Yanning Zhang; [pdf]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Zhenqi and Liu, Ting and Zhang, Yanning}, title = {Efficient Decoupled Feature 3D Gaussian Splatting via Hierarchical Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11156-11166} }
CountLLM: Towards Generalizable Repetitive Action Counting via Large Language Model: Ziyu Yao,

Xuxin Cheng,

Zhiqi Huang,

Lei Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Ziyu and Cheng, Xuxin and Huang, Zhiqi and Li, Lei}, title = {CountLLM: Towards Generalizable Repetitive Action Counting via Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19143-19153} }
Navigating the Unseen: Zero-shot Scene Graph Generation via Capsule-Based Equivariant Features: Wenhuan Huang,

Yi JI,

Guiqian Zhu,

Li Ying,

Chunping Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Wenhuan and JI, Yi and Zhu, Guiqian and Ying, Li and Liu, Chunping}, title = {Navigating the Unseen: Zero-shot Scene Graph Generation via Capsule-Based Equivariant Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29448-29457} }
VL-RewardBench: A Challenging Benchmark for Vision-Language Generative Reward Models: Lei Li,

Yuancheng Wei,

Zhihui Xie,

Xuqing Yang,

Yifan Song,

Peiyi Wang,

Chenxin An,

Tianyu Liu,

Sujian Li,

Bill Yuchen Lin,

Lingpeng Kong,

Qi Liu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Lei and Wei, Yuancheng and Xie, Zhihui and Yang, Xuqing and Song, Yifan and Wang, Peiyi and An, Chenxin and Liu, Tianyu and Li, Sujian and Lin, Bill Yuchen and Kong, Lingpeng and Liu, Qi}, title = {VL-RewardBench: A Challenging Benchmark for Vision-Language Generative Reward Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24657-24668} }
ASHiTA: Automatic Scene-grounded HIerarchical Task Analysis: Yun Chang,

Leonor Fermoselle,

Duy Ta,

Bernadette Bucher,

Luca Carlone,

Jiuguang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Yun and Fermoselle, Leonor and Ta, Duy and Bucher, Bernadette and Carlone, Luca and Wang, Jiuguang}, title = {ASHiTA: Automatic Scene-grounded HIerarchical Task Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29458-29468} }
Patient-Level Anatomy Meets Scanning-Level Physics: Personalized Federated Low-Dose CT Denoising Empowered by Large Language Model: Ziyuan Yang,

Yingyu Chen,

Zhiwen Wang,

Hongming Shan,

Yang Chen,

Yi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Ziyuan and Chen, Yingyu and Wang, Zhiwen and Shan, Hongming and Chen, Yang and Zhang, Yi}, title = {Patient-Level Anatomy Meets Scanning-Level Physics: Personalized Federated Low-Dose CT Denoising Empowered by Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5154-5163} }
Exploiting Deblurring Networks for Radiance Fields: Haeyun Choi,

Heemin Yang,

Janghyeok Han,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Haeyun and Yang, Heemin and Han, Janghyeok and Cho, Sunghyun}, title = {Exploiting Deblurring Networks for Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6012-6021} }
Rethinking Lanes and Points in Complex Scenarios for Monocular 3D Lane Detection: Yifan Chang,

Junjie Huang,

Xiaofeng Wang,

Yun Ye,

Zhujin Liang,

Yi Shan,

Dalong Du,

Xingang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Yifan and Huang, Junjie and Wang, Xiaofeng and Ye, Yun and Liang, Zhujin and Shan, Yi and Du, Dalong and Wang, Xingang}, title = {Rethinking Lanes and Points in Complex Scenarios for Monocular 3D Lane Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6802-6811} }
SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images: Zixuan Huang,

Mark Boss,

Aaryaman Vasishta,

James M. Rehg,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zixuan and Boss, Mark and Vasishta, Aaryaman and Rehg, James M. and Jampani, Varun}, title = {SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16860-16870} }
Discovering Fine-Grained Visual-Concept Relations by Disentangled Optimal Transport Concept Bottleneck Models: Yan Xie,

Zequn Zeng,

Hao Zhang,

Yucheng Ding,

Yi Wang,

Zhengjue Wang,

Bo Chen,

Hongwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yan and Zeng, Zequn and Zhang, Hao and Ding, Yucheng and Wang, Yi and Wang, Zhengjue and Chen, Bo and Liu, Hongwei}, title = {Discovering Fine-Grained Visual-Concept Relations by Disentangled Optimal Transport Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30199-30209} }
Focus-N-Fix: Region-Aware Fine-Tuning for Text-to-Image Generation: Xiaoying Xing,

Avinab Saha,

Junfeng He,

Susan Hao,

Paul Vicol,

Moonkyung Ryu,

Gang Li,

Sahil Singla,

Sarah Young,

Yinxiao Li,

Feng Yang,

Deepak Ramachandran; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Xiaoying and Saha, Avinab and He, Junfeng and Hao, Susan and Vicol, Paul and Ryu, Moonkyung and Li, Gang and Singla, Sahil and Young, Sarah and Li, Yinxiao and Yang, Feng and Ramachandran, Deepak}, title = {Focus-N-Fix: Region-Aware Fine-Tuning for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18486-18496} }
RoomTour3D: Geometry-Aware Video-Instruction Tuning for Embodied Navigation: Mingfei Han,

Liang Ma,

Kamila Zhumakhanova,

Ekaterina Radionova,

Jingyi Zhang,

Xiaojun Chang,

Xiaodan Liang,

Ivan Laptev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Mingfei and Ma, Liang and Zhumakhanova, Kamila and Radionova, Ekaterina and Zhang, Jingyi and Chang, Xiaojun and Liang, Xiaodan and Laptev, Ivan}, title = {RoomTour3D: Geometry-Aware Video-Instruction Tuning for Embodied Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27586-27596} }
PSA-SSL: Pose and Size-aware Self-Supervised Learning on LiDAR Point Clouds: Barza Nisar,

Steven L. Waslander; [pdf] [supp]
[bibtex]
@InProceedings{Nisar_2025_CVPR, author = {Nisar, Barza and Waslander, Steven L.}, title = {PSA-SSL: Pose and Size-aware Self-Supervised Learning on LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6670-6679} }
Bringing CLIP to the Clinic: Dynamic Soft Labels and Negation-Aware Learning for Medical Analysis: Hanbin Ko,

Chang-Min Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ko_2025_CVPR, author = {Ko, Hanbin and Park, Chang-Min}, title = {Bringing CLIP to the Clinic: Dynamic Soft Labels and Negation-Aware Learning for Medical Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25897-25906} }
SnapGen: Taming High-Resolution Text-to-Image Models for Mobile Devices with Efficient Architectures and Training: Jierun Chen,

Dongting Hu,

Xijie Huang,

Huseyin Coskun,

Arpit Sahni,

Aarush Gupta,

Anujraaj Goyal,

Dishani Lahiri,

Rajesh Singh,

Yerlan Idelbayev,

Junli Cao,

Yanyu Li,

Kwang-Ting Cheng,

S.-H. Gary Chan,

Mingming Gong,

Sergey Tulyakov,

Anil Kag,

Yanwu Xu,

Jian Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jierun and Hu, Dongting and Huang, Xijie and Coskun, Huseyin and Sahni, Arpit and Gupta, Aarush and Goyal, Anujraaj and Lahiri, Dishani and Singh, Rajesh and Idelbayev, Yerlan and Cao, Junli and Li, Yanyu and Cheng, Kwang-Ting and Chan, S.-H. Gary and Gong, Mingming and Tulyakov, Sergey and Kag, Anil and Xu, Yanwu and Ren, Jian}, title = {SnapGen: Taming High-Resolution Text-to-Image Models for Mobile Devices with Efficient Architectures and Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7997-8008} }
Label Shift Meets Online Learning: Ensuring Consistent Adaptation with Universal Dynamic Regret: Yucong Dai,

Shilin Gu,

Ruidong Fan,

Chao Xu,

Chenping Hou; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Yucong and Gu, Shilin and Fan, Ruidong and Xu, Chao and Hou, Chenping}, title = {Label Shift Meets Online Learning: Ensuring Consistent Adaptation with Universal Dynamic Regret}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15392-15401} }
A Physics-Informed Blur Learning Framework for Imaging Systems: Liqun Chen,

Yuxuan Li,

Jun Dai,

Jinwei Gu,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Liqun and Li, Yuxuan and Dai, Jun and Gu, Jinwei and Xue, Tianfan}, title = {A Physics-Informed Blur Learning Framework for Imaging Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10913-10922} }
A Semantic Knowledge Complementarity based Decoupling Framework for Semi-supervised Class-imbalanced Medical Image Segmentation: Zheng Zhang,

Guanchun Yin,

Bo Zhang,

Wu Liu,

Xiuzhuang Zhou,

Wendong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zheng and Yin, Guanchun and Zhang, Bo and Liu, Wu and Zhou, Xiuzhuang and Wang, Wendong}, title = {A Semantic Knowledge Complementarity based Decoupling Framework for Semi-supervised Class-imbalanced Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25940-25949} }
Community Forensics: Using Thousands of Generators to Train Fake Image Detectors: Jeongsoo Park,

Andrew Owens; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jeongsoo and Owens, Andrew}, title = {Community Forensics: Using Thousands of Generators to Train Fake Image Detectors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8245-8257} }
ModeSeq: Taming Sparse Multimodal Motion Prediction with Sequential Mode Modeling: Zikang Zhou,

Hengjian Zhou,

Haibo Hu,

Zihao Wen,

Jianping Wang,

Yung-Hui Li,

Yu-Kai Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zikang and Zhou, Hengjian and Hu, Haibo and Wen, Zihao and Wang, Jianping and Li, Yung-Hui and Huang, Yu-Kai}, title = {ModeSeq: Taming Sparse Multimodal Motion Prediction with Sequential Mode Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1612-1621} }
Quaffure: Real-Time Quasi-Static Neural Hair Simulation: Tuur Stuyck,

Gene Wei-Chin Lin,

Egor Larionov,

Hsiao-yu Chen,

Aljaz Bozic,

Nikolaos Sarafianos,

Doug Roble; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Stuyck_2025_CVPR, author = {Stuyck, Tuur and Lin, Gene Wei-Chin and Larionov, Egor and Chen, Hsiao-yu and Bozic, Aljaz and Sarafianos, Nikolaos and Roble, Doug}, title = {Quaffure: Real-Time Quasi-Static Neural Hair Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {239-249} }
Towards Practical Real-Time Neural Video Compression: Zhaoyang Jia,

Bin Li,

Jiahao Li,

Wenxuan Xie,

Linfeng Qi,

Houqiang Li,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2025_CVPR, author = {Jia, Zhaoyang and Li, Bin and Li, Jiahao and Xie, Wenxuan and Qi, Linfeng and Li, Houqiang and Lu, Yan}, title = {Towards Practical Real-Time Neural Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12543-12552} }
DepthSplat: Connecting Gaussian Splatting and Depth: Haofei Xu,

Songyou Peng,

Fangjinhua Wang,

Hermann Blum,

Daniel Barath,

Andreas Geiger,

Marc Pollefeys; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Haofei and Peng, Songyou and Wang, Fangjinhua and Blum, Hermann and Barath, Daniel and Geiger, Andreas and Pollefeys, Marc}, title = {DepthSplat: Connecting Gaussian Splatting and Depth}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16453-16463} }
LumiNet: Latent Intrinsics Meets Diffusion Models for Indoor Scene Relighting: Xiaoyan Xing,

Konrad Groh,

Sezer Karaoglu,

Theo Gevers,

Anand Bhattad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Xiaoyan and Groh, Konrad and Karaoglu, Sezer and Gevers, Theo and Bhattad, Anand}, title = {LumiNet: Latent Intrinsics Meets Diffusion Models for Indoor Scene Relighting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {442-452} }
FedBiP: Heterogeneous One-Shot Federated Learning with Personalized Latent Diffusion Models: Haokun Chen,

Hang Li,

Yao Zhang,

Jinhe Bi,

Gengyuan Zhang,

Yueqi Zhang,

Philip Torr,

Jindong Gu,

Denis Krompass,

Volker Tresp; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Haokun and Li, Hang and Zhang, Yao and Bi, Jinhe and Zhang, Gengyuan and Zhang, Yueqi and Torr, Philip and Gu, Jindong and Krompass, Denis and Tresp, Volker}, title = {FedBiP: Heterogeneous One-Shot Federated Learning with Personalized Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30440-30450} }
DiC: Rethinking Conv3x3 Designs in Diffusion Models: Yuchuan Tian,

Jing Han,

Chengcheng Wang,

Yuchen Liang,

Chao Xu,

Hanting Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Yuchuan and Han, Jing and Wang, Chengcheng and Liang, Yuchen and Xu, Chao and Chen, Hanting}, title = {DiC: Rethinking Conv3x3 Designs in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2469-2478} }
Dynamic Camera Poses and Where to Find Them: Chris Rockwell,

Joseph Tung,

Tsung-Yi Lin,

Ming-Yu Liu,

David F. Fouhey,

Chen-Hsuan Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rockwell_2025_CVPR, author = {Rockwell, Chris and Tung, Joseph and Lin, Tsung-Yi and Liu, Ming-Yu and Fouhey, David F. and Lin, Chen-Hsuan}, title = {Dynamic Camera Poses and Where to Find Them}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12444-12455} }
MoSca: Dynamic Gaussian Fusion from Casual Videos via 4D Motion Scaffolds: Jiahui Lei,

Yijia Weng,

Adam W. Harley,

Leonidas Guibas,

Kostas Daniilidis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Jiahui and Weng, Yijia and Harley, Adam W. and Guibas, Leonidas and Daniilidis, Kostas}, title = {MoSca: Dynamic Gaussian Fusion from Casual Videos via 4D Motion Scaffolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6165-6177} }
GCE-Pose: Global Context Enhancement for Category-level Object Pose Estimation: Weihang Li,

Hongli XU,

Junwen Huang,

Hyunjun Jung,

Peter KT Yu,

Nassir Navab,

Benjamin Busam; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Weihang and XU, Hongli and Huang, Junwen and Jung, Hyunjun and Yu, Peter KT and Navab, Nassir and Busam, Benjamin}, title = {GCE-Pose: Global Context Enhancement for Category-level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27154-27165} }
OmniGen: Unified Image Generation: Shitao Xiao,

Yueze Wang,

Junjie Zhou,

Huaying Yuan,

Xingrun Xing,

Ruiran Yan,

Chaofan Li,

Shuting Wang,

Tiejun Huang,

Zheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Shitao and Wang, Yueze and Zhou, Junjie and Yuan, Huaying and Xing, Xingrun and Yan, Ruiran and Li, Chaofan and Wang, Shuting and Huang, Tiejun and Liu, Zheng}, title = {OmniGen: Unified Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13294-13304} }
QuCOOP: A Versatile Framework for Solving Composite and Binary-Parametrised Problems on Quantum Annealers: Natacha Kuete Meli,

Vladislav Golyanik,

Marcel Seelbach Benkner,

Michael Moeller; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meli_2025_CVPR, author = {Meli, Natacha Kuete and Golyanik, Vladislav and Benkner, Marcel Seelbach and Moeller, Michael}, title = {QuCOOP: A Versatile Framework for Solving Composite and Binary-Parametrised Problems on Quantum Annealers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11395-11405} }
Mesh Mamba: A Unified State Space Model for Saliency Prediction in Non-Textured and Textured Meshes: Kaiwei Zhang,

Dandan Zhu,

Xiongkuo Min,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kaiwei and Zhu, Dandan and Min, Xiongkuo and Zhai, Guangtao}, title = {Mesh Mamba: A Unified State Space Model for Saliency Prediction in Non-Textured and Textured Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16219-16228} }
Not Just Text: Uncovering Vision Modality Typographic Threats in Image Generation Models: Hao Cheng,

Erjia Xiao,

Jiayan Yang,

Jiahang Cao,

Qiang Zhang,

Jize Zhang,

Kaidi Xu,

Jindong Gu,

Renjing Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Hao and Xiao, Erjia and Yang, Jiayan and Cao, Jiahang and Zhang, Qiang and Zhang, Jize and Xu, Kaidi and Gu, Jindong and Xu, Renjing}, title = {Not Just Text: Uncovering Vision Modality Typographic Threats in Image Generation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2997-3007} }
SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation: Leigang Qu,

Haochuan Li,

Wenjie Wang,

Xiang Liu,

Juncheng Li,

Liqiang Nie,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Leigang and Li, Haochuan and Wang, Wenjie and Liu, Xiang and Li, Juncheng and Nie, Liqiang and Chua, Tat-Seng}, title = {SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18497-18508} }
Calibrated Multi-Preference Optimization for Aligning Diffusion Models: Kyungmin Lee,

Xiahong Li,

Qifei Wang,

Junfeng He,

Junjie Ke,

Ming-Hsuan Yang,

Irfan Essa,

Jinwoo Shin,

Feng Yang,

Yinxiao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Kyungmin and Li, Xiahong and Wang, Qifei and He, Junfeng and Ke, Junjie and Yang, Ming-Hsuan and Essa, Irfan and Shin, Jinwoo and Yang, Feng and Li, Yinxiao}, title = {Calibrated Multi-Preference Optimization for Aligning Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18465-18475} }
Learning from Neighbors: Category Extrapolation for Long-Tail Learning: Shizhen Zhao,

Xin Wen,

Jiahui Liu,

Chuofan Ma,

Chunfeng Yuan,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shizhen and Wen, Xin and Liu, Jiahui and Ma, Chuofan and Yuan, Chunfeng and Qi, Xiaojuan}, title = {Learning from Neighbors: Category Extrapolation for Long-Tail Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30483-30492} }
Material Anything: Generating Materials for Any 3D Object via Diffusion: Xin Huang,

Tengfei Wang,

Ziwei Liu,

Qing Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Xin and Wang, Tengfei and Liu, Ziwei and Wang, Qing}, title = {Material Anything: Generating Materials for Any 3D Object via Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26556-26565} }
TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization: Liang Pan,

Zeshi Yang,

Zhiyang Dou,

Wenjia Wang,

Buzhen Huang,

Bo Dai,

Taku Komura,

Jingbo Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Liang and Yang, Zeshi and Dou, Zhiyang and Wang, Wenjia and Huang, Buzhen and Dai, Bo and Komura, Taku and Wang, Jingbo}, title = {TokenHSI: Unified Synthesis of Physical Human-Scene Interactions through Task Tokenization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5379-5391} }
ShapeShifter: 3D Variations Using Multiscale and Sparse Point-Voxel Diffusion: Nissim Maruani,

Wang Yifan,

Matthew Fisher,

Pierre Alliez,

Mathieu Desbrun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maruani_2025_CVPR, author = {Maruani, Nissim and Yifan, Wang and Fisher, Matthew and Alliez, Pierre and Desbrun, Mathieu}, title = {ShapeShifter: 3D Variations Using Multiscale and Sparse Point-Voxel Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {605-617} }
ImagineFSL: Self-Supervised Pretraining Matters on Imagined Base Set for VLM-based Few-shot Learning: Haoyuan Yang,

Xiaoou Li,

Jiaming Lv,

Xianjun Cheng,

Qilong Wang,

Peihua Li; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Haoyuan and Li, Xiaoou and Lv, Jiaming and Cheng, Xianjun and Wang, Qilong and Li, Peihua}, title = {ImagineFSL: Self-Supervised Pretraining Matters on Imagined Base Set for VLM-based Few-shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30020-30031} }
Continuous Locomotive Crowd Behavior Generation: Inhwan Bae,

Junoh Lee,

Hae-Gon Jeon; [pdf] [arXiv]
[bibtex]
@InProceedings{Bae_2025_CVPR, author = {Bae, Inhwan and Lee, Junoh and Jeon, Hae-Gon}, title = {Continuous Locomotive Crowd Behavior Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22416-22431} }
Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness: Beier Zhu,

Jiequan Cui,

Hanwang Zhang,

Chi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Beier and Cui, Jiequan and Zhang, Hanwang and Zhang, Chi}, title = {Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25487-25496} }
Implicit Bias Injection Attacks against Text-to-Image Diffusion Models: Huayang Huang,

Xiangye Jin,

Jiaxu Miao,

Yu Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Huayang and Jin, Xiangye and Miao, Jiaxu and Wu, Yu}, title = {Implicit Bias Injection Attacks against Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28779-28789} }
ROICtrl: Boosting Instance Control for Visual Generation: Yuchao Gu,

Yipin Zhou,

Yunfan Ye,

Yixin Nie,

Licheng Yu,

Pingchuan Ma,

Kevin Qinghong Lin,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yuchao and Zhou, Yipin and Ye, Yunfan and Nie, Yixin and Yu, Licheng and Ma, Pingchuan and Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {ROICtrl: Boosting Instance Control for Visual Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23658-23667} }
FRESA: Feedforward Reconstruction of Personalized Skinned Avatars from Few Images: Rong Wang,

Fabian Prada,

Ziyan Wang,

Zhongshi Jiang,

Chengxiang Yin,

Junxuan Li,

Shunsuke Saito,

Igor Santesteban,

Javier Romero,

Rohan Joshi,

Hongdong Li,

Jason Saragih,

Yaser Sheikh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Rong and Prada, Fabian and Wang, Ziyan and Jiang, Zhongshi and Yin, Chengxiang and Li, Junxuan and Saito, Shunsuke and Santesteban, Igor and Romero, Javier and Joshi, Rohan and Li, Hongdong and Saragih, Jason and Sheikh, Yaser}, title = {FRESA: Feedforward Reconstruction of Personalized Skinned Avatars from Few Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {281-291} }
ReasonGrounder: LVLM-Guided Hierarchical Feature Splatting for Open-Vocabulary 3D Visual Grounding and Reasoning: Zhenyang Liu,

Yikai Wang,

Sixiao Zheng,

Tongying Pan,

Longfei Liang,

Yanwei Fu,

Xiangyang Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhenyang and Wang, Yikai and Zheng, Sixiao and Pan, Tongying and Liang, Longfei and Fu, Yanwei and Xue, Xiangyang}, title = {ReasonGrounder: LVLM-Guided Hierarchical Feature Splatting for Open-Vocabulary 3D Visual Grounding and Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3718-3727} }
Cropper: Vision-Language Model for Image Cropping through In-Context Learning: Seung Hyun Lee,

Jijun Jiang,

Yiran Xu,

Zhuofang Li,

Junjie Ke,

Yinxiao Li,

Junfeng He,

Steven Hickson,

Katie Datsenko,

Sangpil Kim,

Ming-Hsuan Yang,

Irfan Essa,

Feng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Seung Hyun and Jiang, Jijun and Xu, Yiran and Li, Zhuofang and Ke, Junjie and Li, Yinxiao and He, Junfeng and Hickson, Steven and Datsenko, Katie and Kim, Sangpil and Yang, Ming-Hsuan and Essa, Irfan and Yang, Feng}, title = {Cropper: Vision-Language Model for Image Cropping through In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30010-30019} }
Advancing Adversarial Robustness in GNeRFs: The IL2-NeRF Attack: Nicole Meng,

Caleb Manicke,

Ronak Sahu,

Caiwen Ding,

Yingjie Lao; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Nicole and Manicke, Caleb and Sahu, Ronak and Ding, Caiwen and Lao, Yingjie}, title = {Advancing Adversarial Robustness in GNeRFs: The IL2-NeRF Attack}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16388-16397} }
WonderWorld: Interactive 3D Scene Generation from a Single Image: Hong-Xing Yu,

Haoyi Duan,

Charles Herrmann,

William T. Freeman,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hong-Xing and Duan, Haoyi and Herrmann, Charles and Freeman, William T. and Wu, Jiajun}, title = {WonderWorld: Interactive 3D Scene Generation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5916-5926} }
A Lightweight UDF Learning Framework for 3D Reconstruction Based on Local Shape Functions: Jiangbei Hu,

Yanggeng Li,

Fei Hou,

Junhui Hou,

Zhebin Zhang,

Shengfa Wang,

Na Lei,

Ying He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Jiangbei and Li, Yanggeng and Hou, Fei and Hou, Junhui and Zhang, Zhebin and Wang, Shengfa and Lei, Na and He, Ying}, title = {A Lightweight UDF Learning Framework for 3D Reconstruction Based on Local Shape Functions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1297-1307} }
DiffCAM: Data-Driven Saliency Maps by Capturing Feature Differences: Xingjian Li,

Qiming Zhao,

Neelesh Bisht,

Mostofa Rafid Uddin,

Jin Yu Kim,

Bryan Zhang,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xingjian and Zhao, Qiming and Bisht, Neelesh and Uddin, Mostofa Rafid and Kim, Jin Yu and Zhang, Bryan and Xu, Min}, title = {DiffCAM: Data-Driven Saliency Maps by Capturing Feature Differences}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10327-10337} }
PolarNeXt: Rethink Instance Segmentation with Polar Representation: Jiacheng Sun,

Xinghong Zhou,

Yiqiang Wu,

Bin Zhu,

Jiaxuan Lu,

Yu Qin,

Xiaomao Li; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Jiacheng and Zhou, Xinghong and Wu, Yiqiang and Zhu, Bin and Lu, Jiaxuan and Qin, Yu and Li, Xiaomao}, title = {PolarNeXt: Rethink Instance Segmentation with Polar Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19315-19324} }
ScaMo: Exploring the Scaling Law in Autoregressive Motion Generation Model: Shunlin Lu,

Jingbo Wang,

Zeyu Lu,

Ling-Hao Chen,

Wenxun Dai,

Junting Dong,

Zhiyang Dou,

Bo Dai,

Ruimao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Shunlin and Wang, Jingbo and Lu, Zeyu and Chen, Ling-Hao and Dai, Wenxun and Dong, Junting and Dou, Zhiyang and Dai, Bo and Zhang, Ruimao}, title = {ScaMo: Exploring the Scaling Law in Autoregressive Motion Generation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27872-27882} }
From Sparse Signal to Smooth Motion: Real-Time Motion Generation with Rolling Prediction Models: German Barquero,

Nadine Bertsch,

Manojkumar Marramreddy,

Carlos Chacón,

Filippo Arcadu,

Ferran Rigual,

Nicky Sijia He,

Cristina Palmero,

Sergio Escalera,

Yuting Ye,

Robin Kips; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barquero_2025_CVPR, author = {Barquero, German and Bertsch, Nadine and Marramreddy, Manojkumar and Chac\'on, Carlos and Arcadu, Filippo and Rigual, Ferran and He, Nicky Sijia and Palmero, Cristina and Escalera, Sergio and Ye, Yuting and Kips, Robin}, title = {From Sparse Signal to Smooth Motion: Real-Time Motion Generation with Rolling Prediction Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1850-1860} }
Imagine and Seek: Improving Composed Image Retrieval with an Imagined Proxy: You Li,

Fan Ma,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, You and Ma, Fan and Yang, Yi}, title = {Imagine and Seek: Improving Composed Image Retrieval with an Imagined Proxy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3984-3993} }
EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions: Kai Chen,

Yunhao Gou,

Runhui Huang,

Zhili Liu,

Daxin Tan,

Jing Xu,

Chunwei Wang,

Yi Zhu,

Yihan Zeng,

Kuo Yang,

Dingdong Wang,

Kun Xiang,

Haoyuan Li,

Haoli Bai,

Jianhua Han,

Xiaohui Li,

Weike Jin,

Nian Xie,

Yu Zhang,

James T. Kwok,

Hengshuang Zhao,

Xiaodan Liang,

Dit-Yan Yeung,

Xiao Chen,

Zhenguo Li,

Wei Zhang,

Qun Liu,

Lanqing Hong,

Lu Hou,

Hang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kai and Gou, Yunhao and Huang, Runhui and Liu, Zhili and Tan, Daxin and Xu, Jing and Wang, Chunwei and Zhu, Yi and Zeng, Yihan and Yang, Kuo and Wang, Dingdong and Xiang, Kun and Li, Haoyuan and Bai, Haoli and Han, Jianhua and Li, Xiaohui and Jin, Weike and Xie, Nian and Zhang, Yu and Kwok, James T. and Zhao, Hengshuang and Liang, Xiaodan and Yeung, Dit-Yan and Chen, Xiao and Li, Zhenguo and Zhang, Wei and Liu, Qun and Hong, Lanqing and Hou, Lu and Xu, Hang}, title = {EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5455-5466} }
SAM-REF: Introducing Image-Prompt Synergy during Interaction for Detail Enhancement in the Segment Anything Model: Chongkai Yu,

Ting Liu,

Anqi Li,

Xiaochao Qu,

Chengjing Wu,

Luoqi Liu,

Xiaolin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Chongkai and Liu, Ting and Li, Anqi and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Hu, Xiaolin}, title = {SAM-REF: Introducing Image-Prompt Synergy during Interaction for Detail Enhancement in the Segment Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19356-19365} }
DarkIR: Robust Low-Light Image Restoration: Daniel Feijoo,

Juan C. Benito,

Alvaro Garcia,

Marcos V. Conde; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feijoo_2025_CVPR, author = {Feijoo, Daniel and Benito, Juan C. and Garcia, Alvaro and Conde, Marcos V.}, title = {DarkIR: Robust Low-Light Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10879-10889} }
R2C: Mapping Room to Chessboard to Unlock LLM As Low-Level Action Planner: Ziyi Bai,

Hanxuan Li,

Bin Fu,

Chuyan Xiong,

Ruiping Wang,

Xilin Chen; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Ziyi and Li, Hanxuan and Fu, Bin and Xiong, Chuyan and Wang, Ruiping and Chen, Xilin}, title = {R2C: Mapping Room to Chessboard to Unlock LLM As Low-Level Action Planner}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19456-19466} }
ICE: Intrinsic Concept Extraction from a Single Image via Diffusion Models: Fernando Julio Cendra,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cendra_2025_CVPR, author = {Cendra, Fernando Julio and Han, Kai}, title = {ICE: Intrinsic Concept Extraction from a Single Image via Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23734-23743} }
ASIGN: An Anatomy-aware Spatial Imputation Graphic Network for 3D Spatial Transcriptomics: Junchao Zhu,

Ruining Deng,

Tianyuan Yao,

Juming Xiong,

Chongyu Qu,

Junlin Guo,

Siqi Lu,

Mengmeng Yin,

Yu Wang,

Shilin Zhao,

Haichun Yang,

Yuankai Huo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Junchao and Deng, Ruining and Yao, Tianyuan and Xiong, Juming and Qu, Chongyu and Guo, Junlin and Lu, Siqi and Yin, Mengmeng and Wang, Yu and Zhao, Shilin and Yang, Haichun and Huo, Yuankai}, title = {ASIGN: An Anatomy-aware Spatial Imputation Graphic Network for 3D Spatial Transcriptomics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30829-30838} }
Reversing Flow for Image Restoration: Haina Qin,

Wenyang Luo,

Libin Wang,

Dandan Zheng,

Jingdong Chen,

Ming Yang,

Bing Li,

Weiming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Haina and Luo, Wenyang and Wang, Libin and Zheng, Dandan and Chen, Jingdong and Yang, Ming and Li, Bing and Hu, Weiming}, title = {Reversing Flow for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7545-7558} }
Shadow Generation Using Diffusion Model with Geometry Prior: Haonan Zhao,

Qingyang Liu,

Xinhao Tao,

Li Niu,

Guangtao Zhai; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Haonan and Liu, Qingyang and Tao, Xinhao and Niu, Li and Zhai, Guangtao}, title = {Shadow Generation Using Diffusion Model with Geometry Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7603-7612} }
Rethinking Epistemic and Aleatoric Uncertainty for Active Open-Set Annotation: An Energy-Based Approach: Chen-Chen Zong,

Sheng-Jun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zong_2025_CVPR, author = {Zong, Chen-Chen and Huang, Sheng-Jun}, title = {Rethinking Epistemic and Aleatoric Uncertainty for Active Open-Set Annotation: An Energy-Based Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10153-10162} }
Any3DIS: Class-Agnostic 3D Instance Segmentation by 2D Mask Tracking: Phuc Nguyen,

Minh Luu,

Anh Tran,

Cuong Pham,

Khoi Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Phuc and Luu, Minh and Tran, Anh and Pham, Cuong and Nguyen, Khoi}, title = {Any3DIS: Class-Agnostic 3D Instance Segmentation by 2D Mask Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3636-3645} }
FDS: Frequency-Aware Denoising Score for Text-Guided Latent Diffusion Image Editing: Yufan Ren,

Zicong Jiang,

Tong Zhang,

Søren Forchhammer,

Sabine Süsstrunk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Yufan and Jiang, Zicong and Zhang, Tong and Forchhammer, S{\o}ren and S\"usstrunk, Sabine}, title = {FDS: Frequency-Aware Denoising Score for Text-Guided Latent Diffusion Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2651-2660} }
MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling: Jian Yang,

Dacheng Yin,

Yizhou Zhou,

Fengyun Rao,

Wei Zhai,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jian and Yin, Dacheng and Zhou, Yizhou and Rao, Fengyun and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {MMAR: Towards Lossless Multi-Modal Auto-Regressive Probabilistic Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7974-7985} }
ROS-SAM: High-Quality Interactive Segmentation for Remote Sensing Moving Object: Zhe Shan,

Yang Liu,

Lei Zhou,

Cheng Yan,

Heng Wang,

Xia Xie; [pdf] [supp]
[bibtex]
@InProceedings{Shan_2025_CVPR, author = {Shan, Zhe and Liu, Yang and Zhou, Lei and Yan, Cheng and Wang, Heng and Xie, Xia}, title = {ROS-SAM: High-Quality Interactive Segmentation for Remote Sensing Moving Object}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3625-3635} }
MultiMorph: On-demand Atlas Construction: S. Mazdak Abulnaga,

Andrew Hoopes,

Neel Dey,

Malte Hoffmann,

Bruce Fischl,

John Guttag,

Adrian Dalca; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abulnaga_2025_CVPR, author = {Abulnaga, S. Mazdak and Hoopes, Andrew and Dey, Neel and Hoffmann, Malte and Fischl, Bruce and Guttag, John and Dalca, Adrian}, title = {MultiMorph: On-demand Atlas Construction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30906-30917} }
MI-DETR: An Object Detection Model with Multi-time Inquiries Mechanism: Zhixiong Nan,

Xianghong Li,

Jifeng Dai,

Tao Xiang; [pdf] [supp]
[bibtex]
@InProceedings{Nan_2025_CVPR, author = {Nan, Zhixiong and Li, Xianghong and Dai, Jifeng and Xiang, Tao}, title = {MI-DETR: An Object Detection Model with Multi-time Inquiries Mechanism}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4703-4712} }
From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling: Jinhong Lin,

Cheng-En Wu,

Huanran Li,

Jifan Zhang,

Yu Hen Hu,

Pedro Morgado; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jinhong and Wu, Cheng-En and Li, Huanran and Zhang, Jifan and Hu, Yu Hen and Morgado, Pedro}, title = {From Prototypes to General Distributions: An Efficient Curriculum for Masked Image Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20028-20038} }
Synthetic Visual Genome: Jae Sung Park,

Zixian Ma,

Linjie Li,

Chenhao Zheng,

Cheng-Yu Hsieh,

Ximing Lu,

Khyathi Chandu,

Quan Kong,

Norimasa Kobori,

Ali Farhadi,

Yejin Choi,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jae Sung and Ma, Zixian and Li, Linjie and Zheng, Chenhao and Hsieh, Cheng-Yu and Lu, Ximing and Chandu, Khyathi and Kong, Quan and Kobori, Norimasa and Farhadi, Ali and Choi, Yejin and Krishna, Ranjay}, title = {Synthetic Visual Genome}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9073-9086} }
Difference Inversion: Interpolate and Isolate the Difference with Token Consistency for Image Analogy Generation: Hyunsoo Kim,

Donghyun Kim,

Suhyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Hyunsoo and Kim, Donghyun and Kim, Suhyun}, title = {Difference Inversion: Interpolate and Isolate the Difference with Token Consistency for Image Analogy Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18250-18259} }
Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding: Wei Suo,

Lijun Zhang,

Mengyang Sun,

Lin Yuanbo Wu,

Peng Wang,

Yanning Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Suo_2025_CVPR, author = {Suo, Wei and Zhang, Lijun and Sun, Mengyang and Wu, Lin Yuanbo and Wang, Peng and Zhang, Yanning}, title = {Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29904-29914} }
MTADiffusion: Mask Text Alignment Diffusion Model for Object Inpainting: Jun Huang,

Ting Liu,

Yihang Wu,

Xiaochao Qu,

Luoqi Liu,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jun and Liu, Ting and Wu, Yihang and Qu, Xiaochao and Liu, Luoqi and Hu, Xiaolin}, title = {MTADiffusion: Mask Text Alignment Diffusion Model for Object Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18325-18334} }
Stop Learning it all to Mitigate Visual Hallucination, Focus on the Hallucination Target.: Dokyoon Yoon,

Youngsook Song,

Woomyoung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoon_2025_CVPR, author = {Yoon, Dokyoon and Song, Youngsook and Park, Woomyoung}, title = {Stop Learning it all to Mitigate Visual Hallucination, Focus on the Hallucination Target.}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4200-4208} }
Seeing the Abstract: Translating the Abstract Language for Vision Language Models: Davide Talon,

Federico Girella,

Ziyue Liu,

Marco Cristani,

Yiming Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Talon_2025_CVPR, author = {Talon, Davide and Girella, Federico and Liu, Ziyue and Cristani, Marco and Wang, Yiming}, title = {Seeing the Abstract: Translating the Abstract Language for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9253-9262} }
Spiking Transformer: Introducing Accurate Addition-Only Spiking Self-Attention for Transformer: Yufei Guo,

Xiaode Liu,

Yuanpei Chen,

Weihang Peng,

Yuhan Zhang,

Zhe Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yufei and Liu, Xiaode and Chen, Yuanpei and Peng, Weihang and Zhang, Yuhan and Ma, Zhe}, title = {Spiking Transformer: Introducing Accurate Addition-Only Spiking Self-Attention for Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24398-24408} }
Grounding 3D Object Affordance with Language Instructions, Visual Observations and Interactions: He Zhu,

Quyu Kong,

Kechun Xu,

Xunlong Xia,

Bing Deng,

Jieping Ye,

Rong Xiong,

Yue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, He and Kong, Quyu and Xu, Kechun and Xia, Xunlong and Deng, Bing and Ye, Jieping and Xiong, Rong and Wang, Yue}, title = {Grounding 3D Object Affordance with Language Instructions, Visual Observations and Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17337-17346} }
MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes: Ruijie Lu,

Yixin Chen,

Junfeng Ni,

Baoxiong Jia,

Yu Liu,

Diwen Wan,

Gang Zeng,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Ruijie and Chen, Yixin and Ni, Junfeng and Jia, Baoxiong and Liu, Yu and Wan, Diwen and Zeng, Gang and Huang, Siyuan}, title = {MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26767-26778} }
One-Step Event-Driven High-Speed Autofocus: Yuhan Bao,

Shaohua Gao,

Wenyong Li,

Kaiwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2025_CVPR, author = {Bao, Yuhan and Gao, Shaohua and Li, Wenyong and Wang, Kaiwei}, title = {One-Step Event-Driven High-Speed Autofocus}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6222-6230} }
Symbolic Representation for Any-to-Any Generative Tasks: Jiaqi Chen,

Xiaoye Zhu,

Yue Wang,

Tianyang Liu,

Xinhui Chen,

Ying Chen,

Chak Tou Leong,

Yifei Ke,

Joseph Liu,

Yiwen Yuan,

Julian McAuley,

Li-jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jiaqi and Zhu, Xiaoye and Wang, Yue and Liu, Tianyang and Chen, Xinhui and Chen, Ying and Leong, Chak Tou and Ke, Yifei and Liu, Joseph and Yuan, Yiwen and McAuley, Julian and Li, Li-jia}, title = {Symbolic Representation for Any-to-Any Generative Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27816-27826} }
Protecting Your Video Content: Disrupting Automated Video-based LLM Annotations: Haitong Liu,

Kuofeng Gao,

Yang Bai,

Jinmin Li,

Jinxiao Shan,

Tao Dai,

Shu-Tao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Haitong and Gao, Kuofeng and Bai, Yang and Li, Jinmin and Shan, Jinxiao and Dai, Tao and Xia, Shu-Tao}, title = {Protecting Your Video Content: Disrupting Automated Video-based LLM Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24056-24065} }
PanDA: Towards Panoramic Depth Anything with Unlabeled Panoramas and Mobius Spatial Augmentation: Zidong Cao,

Jinjing Zhu,

Weiming Zhang,

Hao Ai,

Haotian Bai,

Hengshuang Zhao,

Lin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Zidong and Zhu, Jinjing and Zhang, Weiming and Ai, Hao and Bai, Haotian and Zhao, Hengshuang and Wang, Lin}, title = {PanDA: Towards Panoramic Depth Anything with Unlabeled Panoramas and Mobius Spatial Augmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {982-992} }
Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture: Xuanchen Li,

Jianyu Wang,

Yuhao Cheng,

Yikun Zeng,

Xingyu Ren,

Wenhan Zhu,

Weiming Zhao,

Yichao Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xuanchen and Wang, Jianyu and Cheng, Yuhao and Zeng, Yikun and Ren, Xingyu and Zhu, Wenhan and Zhao, Weiming and Yan, Yichao}, title = {Towards High-fidelity 3D Talking Avatar with Personalized Dynamic Texture}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {204-214} }
Scene Splatter: Momentum 3D Scene Generation from Single Image with Video Diffusion Model: Shengjun Zhang,

Jinzhao Li,

Xin Fei,

Hao Liu,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shengjun and Li, Jinzhao and Fei, Xin and Liu, Hao and Duan, Yueqi}, title = {Scene Splatter: Momentum 3D Scene Generation from Single Image with Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6089-6098} }
JiSAM: Alleviate Labeling Burden and Corner Case Problems in Autonomous Driving via Minimal Real-World Data: Runjian Chen,

Wenqi Shao,

Bo Zhang,

Shaoshuai Shi,

Li Jiang,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Runjian and Shao, Wenqi and Zhang, Bo and Shi, Shaoshuai and Jiang, Li and Luo, Ping}, title = {JiSAM: Alleviate Labeling Burden and Corner Case Problems in Autonomous Driving via Minimal Real-World Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6792-6801} }
OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction: Gehui Li,

Bin Chen,

Chen Zhao,

Lei Zhang,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Gehui and Chen, Bin and Zhao, Chen and Zhang, Lei and Zhang, Jian}, title = {OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7480-7490} }
Image is All You Need to Empower Large-scale Diffusion Models for In-Domain Generation: Pu Cao,

Feng Zhou,

Lu Yang,

Tianrui Huang,

Qing Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Pu and Zhou, Feng and Yang, Lu and Huang, Tianrui and Song, Qing}, title = {Image is All You Need to Empower Large-scale Diffusion Models for In-Domain Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18358-18368} }
MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations: Ziyang Zhang,

Yang Yu,

Yucheng Chen,

Xulei Yang,

Si Yong Yeo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ziyang and Yu, Yang and Chen, Yucheng and Yang, Xulei and Yeo, Si Yong}, title = {MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29744-29755} }
RandAR: Decoder-only Autoregressive Visual Generation in Random Orders: Ziqi Pang,

Tianyuan Zhang,

Fujun Luan,

Yunze Man,

Hao Tan,

Kai Zhang,

William T. Freeman,

Yu-Xiong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Ziqi and Zhang, Tianyuan and Luan, Fujun and Man, Yunze and Tan, Hao and Zhang, Kai and Freeman, William T. and Wang, Yu-Xiong}, title = {RandAR: Decoder-only Autoregressive Visual Generation in Random Orders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {45-55} }
Evolving High-Quality Rendering and Reconstruction in a Unified Framework with Contribution-Adaptive Regularization: You Shen,

Zhipeng Zhang,

Xinyang Li,

Yansong Qu,

Yu Lin,

Shengchuan Zhang,

Liujuan Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, You and Zhang, Zhipeng and Li, Xinyang and Qu, Yansong and Lin, Yu and Zhang, Shengchuan and Cao, Liujuan}, title = {Evolving High-Quality Rendering and Reconstruction in a Unified Framework with Contribution-Adaptive Regularization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16346-16355} }
ArticulatedGS: Self-supervised Digital Twin Modeling of Articulated Objects using 3D Gaussian Splatting: Junfu Guo,

Yu Xin,

Gaoyi Liu,

Kai Xu,

Ligang Liu,

Ruizhen Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Junfu and Xin, Yu and Liu, Gaoyi and Xu, Kai and Liu, Ligang and Hu, Ruizhen}, title = {ArticulatedGS: Self-supervised Digital Twin Modeling of Articulated Objects using 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27144-27153} }
NoiseCtrl: A Sampling-Algorithm-Agnostic Conditional Generation Method for Diffusion Models: Longquan Dai,

He Wang,

Jinhui Tang; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Longquan and Wang, He and Tang, Jinhui}, title = {NoiseCtrl: A Sampling-Algorithm-Agnostic Conditional Generation Method for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18093-18102} }
Leveraging 3D Geometric Priors in 2D Rotation Symmetry Detection: Ahyun Seo,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2025_CVPR, author = {Seo, Ahyun and Cho, Minsu}, title = {Leveraging 3D Geometric Priors in 2D Rotation Symmetry Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22109-22118} }
KMD: Koopman Multi-modality Decomposition for Generalized Brain Tumor Segmentation under Incomplete Modalities: Tianyi Liu,

Haochuan Jiang,

Kaizhu Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Tianyi and Jiang, Haochuan and Huang, Kaizhu}, title = {KMD: Koopman Multi-modality Decomposition for Generalized Brain Tumor Segmentation under Incomplete Modalities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15663-15671} }
Vid2Sim: Realistic and Interactive Simulation from Video for Urban Navigation: Ziyang Xie,

Zhizheng Liu,

Zhenghao Peng,

Wayne Wu,

Bolei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Ziyang and Liu, Zhizheng and Peng, Zhenghao and Wu, Wayne and Zhou, Bolei}, title = {Vid2Sim: Realistic and Interactive Simulation from Video for Urban Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1581-1591} }
DORNet: A Degradation Oriented and Regularized Network for Blind Depth Super-Resolution: Zhengxue Wang,

Zhiqiang Yan,

Jinshan Pan,

Guangwei Gao,

Kai Zhang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhengxue and Yan, Zhiqiang and Pan, Jinshan and Gao, Guangwei and Zhang, Kai and Yang, Jian}, title = {DORNet: A Degradation Oriented and Regularized Network for Blind Depth Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15813-15822} }
Fractal Calibration for Long-tailed Object Detection: Konstantinos Panagiotis Alexandridis,

Ismail Elezi,

Jiankang Deng,

Anh Nguyen,

Shan Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alexandridis_2025_CVPR, author = {Alexandridis, Konstantinos Panagiotis and Elezi, Ismail and Deng, Jiankang and Nguyen, Anh and Luo, Shan}, title = {Fractal Calibration for Long-tailed Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15139-15150} }
M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings: Qingzheng Xu,

Ru Cao,

Xin Shen,

Heming Du,

Sen Wang,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Qingzheng and Cao, Ru and Shen, Xin and Du, Heming and Wang, Sen and Yu, Xin}, title = {M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12289-12300} }
Noise Calibration and Spatial-Frequency Interactive Network for STEM Image Enhancement: Hesong Li,

Ziqi Wu,

Ruiwen Shao,

Tao Zhang,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hesong and Wu, Ziqi and Shao, Ruiwen and Zhang, Tao and Fu, Ying}, title = {Noise Calibration and Spatial-Frequency Interactive Network for STEM Image Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21287-21296} }
Type-R: Automatically Retouching Typos for Text-to-Image Generation: Wataru Shimoda,

Naoto Inoue,

Daichi Haraguchi,

Hayato Mitani,

Seiichi Uchida,

Kota Yamaguchi; [pdf] [supp]
[bibtex]
@InProceedings{Shimoda_2025_CVPR, author = {Shimoda, Wataru and Inoue, Naoto and Haraguchi, Daichi and Mitani, Hayato and Uchida, Seiichi and Yamaguchi, Kota}, title = {Type-R: Automatically Retouching Typos for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2745-2754} }
Video-3D LLM: Learning Position-Aware Video Representation for 3D Scene Understanding: Duo Zheng,

Shijia Huang,

Liwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Duo and Huang, Shijia and Wang, Liwei}, title = {Video-3D LLM: Learning Position-Aware Video Representation for 3D Scene Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8995-9006} }
FedSPA: Generalizable Federated Graph Learning under Homophily Heterogeneity: Zihan Tan,

Guancheng Wan,

Wenke Huang,

He Li,

Guibin Zhang,

Carl Yang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Zihan and Wan, Guancheng and Huang, Wenke and Li, He and Zhang, Guibin and Yang, Carl and Ye, Mang}, title = {FedSPA: Generalizable Federated Graph Learning under Homophily Heterogeneity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15464-15475} }
Homogeneous Dynamics Space for Heterogeneous Humans: Xinpeng Liu,

Junxuan Liang,

Chenshuo Zhang,

Zixuan Cai,

Cewu Lu,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xinpeng and Liang, Junxuan and Zhang, Chenshuo and Cai, Zixuan and Lu, Cewu and Li, Yong-Lu}, title = {Homogeneous Dynamics Space for Heterogeneous Humans}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27782-27793} }
TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection: Yoon Gyo Jung,

Jaewoo Park,

Jaeho Yoon,

Kuan-Chuan Peng,

Wonchul Kim,

Andrew Beng Jin Teoh,

Octavia Camps; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Yoon Gyo and Park, Jaewoo and Yoon, Jaeho and Peng, Kuan-Chuan and Kim, Wonchul and Teoh, Andrew Beng Jin and Camps, Octavia}, title = {TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25539-25548} }
GazeGene: Large-scale Synthetic Gaze Dataset with 3D Eyeball Annotations: Yiwei Bao,

Zhiming Wang,

Feng Lu; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2025_CVPR, author = {Bao, Yiwei and Wang, Zhiming and Lu, Feng}, title = {GazeGene: Large-scale Synthetic Gaze Dataset with 3D Eyeball Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18749-18759} }
VideoHandles: Editing 3D Object Compositions in Videos Using Video Generative Priors: Juil Koo,

Paul Guerrero,

Chun-Hao P. Huang,

Duygu Ceylan,

Minhyuk Sung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koo_2025_CVPR, author = {Koo, Juil and Guerrero, Paul and Huang, Chun-Hao P. and Ceylan, Duygu and Sung, Minhyuk}, title = {VideoHandles: Editing 3D Object Compositions in Videos Using Video Generative Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17692-17701} }
Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution: Siwei Tu,

Ben Fei,

Weidong Yang,

Fenghua Ling,

Hao Chen,

Zili Liu,

Kun Chen,

Hang Fan,

Wanli Ouyang,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Siwei and Fei, Ben and Yang, Weidong and Ling, Fenghua and Chen, Hao and Liu, Zili and Chen, Kun and Fan, Hang and Ouyang, Wanli and Bai, Lei}, title = {Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28071-28080} }
Reconstructing People, Places, and Cameras: Lea Müller,

Hongsuk Choi,

Anthony Zhang,

Brent Yi,

Jitendra Malik,

Angjoo Kanazawa; [pdf] [supp]
[bibtex]
@InProceedings{Muller_2025_CVPR, author = {M\"uller, Lea and Choi, Hongsuk and Zhang, Anthony and Yi, Brent and Malik, Jitendra and Kanazawa, Angjoo}, title = {Reconstructing People, Places, and Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21948-21958} }
InPO: Inversion Preference Optimization with Reparametrized DDIM for Efficient Diffusion Model Alignment: Yunhong Lu,

Qichao Wang,

Hengyuan Cao,

Xierui Wang,

Xiaoyin Xu,

Min Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yunhong and Wang, Qichao and Cao, Hengyuan and Wang, Xierui and Xu, Xiaoyin and Zhang, Min}, title = {InPO: Inversion Preference Optimization with Reparametrized DDIM for Efficient Diffusion Model Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28629-28639} }
GaussTR: Foundation Model-Aligned Gaussian Transformer for Self-Supervised 3D Spatial Understanding: Haoyi Jiang,

Liu Liu,

Tianheng Cheng,

Xinjie Wang,

Tianwei Lin,

Zhizhong Su,

Wenyu Liu,

Xinggang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Haoyi and Liu, Liu and Cheng, Tianheng and Wang, Xinjie and Lin, Tianwei and Su, Zhizhong and Liu, Wenyu and Wang, Xinggang}, title = {GaussTR: Foundation Model-Aligned Gaussian Transformer for Self-Supervised 3D Spatial Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11960-11970} }
Single Domain Generalization for Few-Shot Counting via Universal Representation Matching: Xianing Chen,

Si Huo,

Borui Jiang,

Hailin Hu,

Xinghao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xianing and Huo, Si and Jiang, Borui and Hu, Hailin and Chen, Xinghao}, title = {Single Domain Generalization for Few-Shot Counting via Universal Representation Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4639-4649} }
Discovering Hidden Visual Concepts Beyond Linguistic Input in Infant Learning: Xueyi Ke,

Satoshi Tsutsui,

Yayun Zhang,

Bihan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Xueyi and Tsutsui, Satoshi and Zhang, Yayun and Wen, Bihan}, title = {Discovering Hidden Visual Concepts Beyond Linguistic Input in Infant Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4343-4352} }
Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild: Damien Teney,

Liangze Jiang,

Florin Gogianu,

Ehsan Abbasnejad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Teney_2025_CVPR, author = {Teney, Damien and Jiang, Liangze and Gogianu, Florin and Abbasnejad, Ehsan}, title = {Do We Always Need the Simplicity Bias? Looking for Optimal Inductive Biases in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {79-90} }
A General Adaptive Dual-level Weighting Mechanism for Remote Sensing Pansharpening: Jie Huang,

Haorui Chen,

Jiaxuan Ren,

Siran Peng,

Liangjian Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jie and Chen, Haorui and Ren, Jiaxuan and Peng, Siran and Deng, Liangjian}, title = {A General Adaptive Dual-level Weighting Mechanism for Remote Sensing Pansharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7447-7456} }
RASP: Revisiting 3D Anamorphic Art for Shadow-Guided Packing of Irregular Objects: Soumyaratna Debnath,

Ashish Tiwari,

Kaustubh Sadekar,

Shanmuganathan Raman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Debnath_2025_CVPR, author = {Debnath, Soumyaratna and Tiwari, Ashish and Sadekar, Kaustubh and Raman, Shanmuganathan}, title = {RASP: Revisiting 3D Anamorphic Art for Shadow-Guided Packing of Irregular Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5849-5858} }
Identifying and Mitigating Spurious Correlation in Multi-Task Learning: Junyi Chai,

Shenyu Lu,

Xiaoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chai_2025_CVPR, author = {Chai, Junyi and Lu, Shenyu and Wang, Xiaoqian}, title = {Identifying and Mitigating Spurious Correlation in Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25698-25707} }
Continuous, Subject-Specific Attribute Control in T2I Models by Identifying Semantic Directions: Stefan Andreas Baumann,

Felix Krause,

Michael Neumayr,

Nick Stracke,

Melvin Sevi,

Vincent Tao Hu,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baumann_2025_CVPR, author = {Baumann, Stefan Andreas and Krause, Felix and Neumayr, Michael and Stracke, Nick and Sevi, Melvin and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Continuous, Subject-Specific Attribute Control in T2I Models by Identifying Semantic Directions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13231-13241} }
Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality Gap Between Text and Vision for Zero-Shot Image Captioning: Jeong Ryong Lee,

Yejee Shin,

Geonhui Son,

Dosik Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jeong Ryong and Shin, Yejee and Son, Geonhui and Hwang, Dosik}, title = {Diffusion Bridge: Leveraging Diffusion Model to Reduce the Modality Gap Between Text and Vision for Zero-Shot Image Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4050-4059} }
MODfinity: Unsupervised Domain Adaptation with Multimodal Information Flow Intertwining: Shanglin Liu,

Jianming Lv,

Jingdan Kang,

Huaidong Zhang,

Zequan Liang,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shanglin and Lv, Jianming and Kang, Jingdan and Zhang, Huaidong and Liang, Zequan and He, Shengfeng}, title = {MODfinity: Unsupervised Domain Adaptation with Multimodal Information Flow Intertwining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5092-5101} }
Towards Universal Soccer Video Understanding: Jiayuan Rao,

Haoning Wu,

Hao Jiang,

Ya Zhang,

Yanfeng Wang,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rao_2025_CVPR, author = {Rao, Jiayuan and Wu, Haoning and Jiang, Hao and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {Towards Universal Soccer Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8384-8394} }
SimLingo: Vision-Only Closed-Loop Autonomous Driving with Language-Action Alignment: Katrin Renz,

Long Chen,

Elahe Arani,

Oleg Sinavski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Renz_2025_CVPR, author = {Renz, Katrin and Chen, Long and Arani, Elahe and Sinavski, Oleg}, title = {SimLingo: Vision-Only Closed-Loop Autonomous Driving with Language-Action Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11993-12003} }
Improved Video VAE for Latent Video Diffusion Model: Pingyu Wu,

Kai Zhu,

Yu Liu,

Liming Zhao,

Wei Zhai,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Pingyu and Zhu, Kai and Liu, Yu and Zhao, Liming and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {Improved Video VAE for Latent Video Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18124-18133} }
Immune: Improving Safety Against Jailbreaks in Multi-modal LLMs via Inference-Time Alignment: Soumya Suvra Ghosal,

Souradip Chakraborty,

Vaibhav Singh,

Tianrui Guan,

Mengdi Wang,

Ahmad Beirami,

Furong Huang,

Alvaro Velasquez,

Dinesh Manocha,

Amrit Singh Bedi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosal_2025_CVPR, author = {Ghosal, Soumya Suvra and Chakraborty, Souradip and Singh, Vaibhav and Guan, Tianrui and Wang, Mengdi and Beirami, Ahmad and Huang, Furong and Velasquez, Alvaro and Manocha, Dinesh and Bedi, Amrit Singh}, title = {Immune: Improving Safety Against Jailbreaks in Multi-modal LLMs via Inference-Time Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25038-25049} }
Efficient Video Super-Resolution for Real-time Rendering with Decoupled G-buffer Guidance: Mingjun Zheng,

Long Sun,

Jiangxin Dong,

Jinshan Pan; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Mingjun and Sun, Long and Dong, Jiangxin and Pan, Jinshan}, title = {Efficient Video Super-Resolution for Real-time Rendering with Decoupled G-buffer Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11328-11337} }
CustomKD: Customizing Large Vision Foundation for Edge Model Improvement via Knowledge Distillation: Jungsoo Lee,

Debasmit Das,

Munawar Hayat,

Sungha Choi,

Kyuwoong Hwang,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jungsoo and Das, Debasmit and Hayat, Munawar and Choi, Sungha and Hwang, Kyuwoong and Porikli, Fatih}, title = {CustomKD: Customizing Large Vision Foundation for Edge Model Improvement via Knowledge Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25176-25186} }
Enhancing Privacy-Utility Trade-offs to Mitigate Memorization in Diffusion Models: Chen Chen,

Daochang Liu,

Mubarak Shah,

Chang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Chen and Liu, Daochang and Shah, Mubarak and Xu, Chang}, title = {Enhancing Privacy-Utility Trade-offs to Mitigate Memorization in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8182-8191} }
Learned Image Compression with Dictionary-based Entropy Model: Jingbo Lu,

Leheng Zhang,

Xingyu Zhou,

Mu Li,

Wen Li,

Shuhang Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Jingbo and Zhang, Leheng and Zhou, Xingyu and Li, Mu and Li, Wen and Gu, Shuhang}, title = {Learned Image Compression with Dictionary-based Entropy Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12850-12859} }
PMNI: Pose-free Multi-view Normal Integration for Reflective and Textureless Surface Reconstruction: Mingzhi Pei,

Xu Cao,

Xiangyi Wang,

Heng Guo,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2025_CVPR, author = {Pei, Mingzhi and Cao, Xu and Wang, Xiangyi and Guo, Heng and Ma, Zhanyu}, title = {PMNI: Pose-free Multi-view Normal Integration for Reflective and Textureless Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26834-26843} }
NVComposer: Boosting Generative Novel View Synthesis with Multiple Sparse and Unposed Images: Lingen Li,

Zhaoyang Zhang,

Yaowei Li,

Jiale Xu,

Wenbo Hu,

Xiaoyu Li,

Weihao Cheng,

Jinwei Gu,

Tianfan Xue,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Lingen and Zhang, Zhaoyang and Li, Yaowei and Xu, Jiale and Hu, Wenbo and Li, Xiaoyu and Cheng, Weihao and Gu, Jinwei and Xue, Tianfan and Shan, Ying}, title = {NVComposer: Boosting Generative Novel View Synthesis with Multiple Sparse and Unposed Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {777-787} }
LeanGaussian: Breaking Pixel or Point Cloud Correspondence in Modeling 3D Gaussians: Jiamin Wu,

Kenkun Liu,

Han Gao,

Xiaoke Jiang,

Yuan Yao,

Lei Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jiamin and Liu, Kenkun and Gao, Han and Jiang, Xiaoke and Yao, Yuan and Zhang, Lei}, title = {LeanGaussian: Breaking Pixel or Point Cloud Correspondence in Modeling 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26641-26651} }
Modeling Multiple Normal Action Representations for Error Detection in Procedural Tasks: Wei-Jin Huang,

Yuan-Ming Li,

Zhi-Wei Xia,

Yu-Ming Tang,

Kun-Yu Lin,

Jian-Fang Hu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Wei-Jin and Li, Yuan-Ming and Xia, Zhi-Wei and Tang, Yu-Ming and Lin, Kun-Yu and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Modeling Multiple Normal Action Representations for Error Detection in Procedural Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27794-27804} }
Efficient Personalization of Quantized Diffusion Model without Backpropagation: Hoigi Seo,

Wongi Jeong,

Kyungryeol Lee,

Se Young Chun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2025_CVPR, author = {Seo, Hoigi and Jeong, Wongi and Lee, Kyungryeol and Chun, Se Young}, title = {Efficient Personalization of Quantized Diffusion Model without Backpropagation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7717-7727} }
Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space: Yifan Zhou,

Zeqi Xiao,

Shuai Yang,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Yifan and Xiao, Zeqi and Yang, Shuai and Pan, Xingang}, title = {Alias-Free Latent Diffusion Models: Improving Fractional Shift Equivariance of Diffusion Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {34-44} }
A Unified Latent Schrodinger Bridge Diffusion Model for Unsupervised Anomaly Detection and Localization: Shilhora Akshay,

Niveditha Lakshmi Narasimhan,

Jacob George,

Vineeth N Balasubramanian; [pdf] [supp]
[bibtex]
@InProceedings{Akshay_2025_CVPR, author = {Akshay, Shilhora and Narasimhan, Niveditha Lakshmi and George, Jacob and Balasubramanian, Vineeth N}, title = {A Unified Latent Schrodinger Bridge Diffusion Model for Unsupervised Anomaly Detection and Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25528-25538} }
KVQ: Boosting Video Quality Assessment via Saliency-guided Local Perception: Yunpeng Qu,

Kun Yuan,

Qizhi Xie,

Ming Sun,

Chao Zhou,

Jian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Yunpeng and Yuan, Kun and Xie, Qizhi and Sun, Ming and Zhou, Chao and Wang, Jian}, title = {KVQ: Boosting Video Quality Assessment via Saliency-guided Local Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2150-2160} }
MambaVision: A Hybrid Mamba-Transformer Vision Backbone: Ali Hatamizadeh,

Jan Kautz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hatamizadeh_2025_CVPR, author = {Hatamizadeh, Ali and Kautz, Jan}, title = {MambaVision: A Hybrid Mamba-Transformer Vision Backbone}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25261-25270} }
Learning Flow Fields in Attention for Controllable Person Image Generation: Zijian Zhou,

Shikun Liu,

Xiao Han,

Haozhe Liu,

Kam Woh Ng,

Tian Xie,

Yuren Cong,

Hang Li,

Mengmeng Xu,

Juan-Manuel Perez-Rua,

Aditya Patel,

Tao Xiang,

Miaojing Shi,

Sen He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Zijian and Liu, Shikun and Han, Xiao and Liu, Haozhe and Ng, Kam Woh and Xie, Tian and Cong, Yuren and Li, Hang and Xu, Mengmeng and Perez-Rua, Juan-Manuel and Patel, Aditya and Xiang, Tao and Shi, Miaojing and He, Sen}, title = {Learning Flow Fields in Attention for Controllable Person Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2491-2501} }
Multi-Label Prototype Visual Spatial Search for Weakly Supervised Semantic Segmentation: Songsong Duan,

Xi Yang,

Nannan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Songsong and Yang, Xi and Wang, Nannan}, title = {Multi-Label Prototype Visual Spatial Search for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30241-30250} }
Early-Bird Diffusion: Investigating and Leveraging Timestep-Aware Early-Bird Tickets in Diffusion Models for Efficient Training: Lexington Whalen,

Zhenbang Du,

Haoran You,

Chaojian Li,

Sixu Li,

Yingyan Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Whalen_2025_CVPR, author = {Whalen, Lexington and Du, Zhenbang and You, Haoran and Li, Chaojian and Li, Sixu and Lin, Yingyan}, title = {Early-Bird Diffusion: Investigating and Leveraging Timestep-Aware Early-Bird Tickets in Diffusion Models for Efficient Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7675-7684} }
FireEdit: Fine-grained Instruction-based Image Editing via Region-aware Vision Language Model: Jun Zhou,

Jiahao Li,

Zunnan Xu,

Hanhui Li,

Yiji Cheng,

Fa-Ting Hong,

Qin Lin,

Qinglin Lu,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jun and Li, Jiahao and Xu, Zunnan and Li, Hanhui and Cheng, Yiji and Hong, Fa-Ting and Lin, Qin and Lu, Qinglin and Liang, Xiaodan}, title = {FireEdit: Fine-grained Instruction-based Image Editing via Region-aware Vision Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13093-13103} }
Doppelgangers++: Improved Visual Disambiguation with Geometric 3D Features: Yuanbo Xiangli,

Ruojin Cai,

Hanyu Chen,

Jeffrey Byrne,

Noah Snavely; [pdf]
[bibtex]
@InProceedings{Xiangli_2025_CVPR, author = {Xiangli, Yuanbo and Cai, Ruojin and Chen, Hanyu and Byrne, Jeffrey and Snavely, Noah}, title = {Doppelgangers++: Improved Visual Disambiguation with Geometric 3D Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27166-27175} }
Learnable Infinite Taylor Gaussian for Dynamic View Rendering: Bingbing Hu,

Yanyan Li,

Rui Xie,

Bo Xu,

Haoye Dong,

Junfeng Yao,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Bingbing and Li, Yanyan and Xie, Rui and Xu, Bo and Dong, Haoye and Yao, Junfeng and Lee, Gim Hee}, title = {Learnable Infinite Taylor Gaussian for Dynamic View Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26844-26854} }
DL2G: Degradation-guided Local-to-Global Restoration for Eyeglass Reflection Removal: Zhilv Yi,

Xiao Lu,

Hong Ding,

Jingbo Hu,

Zhi Jiang,

Chunxia Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2025_CVPR, author = {Yi, Zhilv and Lu, Xiao and Ding, Hong and Hu, Jingbo and Jiang, Zhi and Xiao, Chunxia}, title = {DL2G: Degradation-guided Local-to-Global Restoration for Eyeglass Reflection Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16061-16070} }
DIV-FF: Dynamic Image-Video Feature Fields For Environment Understanding in Egocentric Videos: Lorenzo Mur-Labadia,

Josechu Guerrero,

Ruben Martinez-Cantin; [pdf] [supp]
[bibtex]
@InProceedings{Mur-Labadia_2025_CVPR, author = {Mur-Labadia, Lorenzo and Guerrero, Josechu and Martinez-Cantin, Ruben}, title = {DIV-FF: Dynamic Image-Video Feature Fields For Environment Understanding in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3470-3480} }
SaMam: Style-aware State Space Model for Arbitrary Image Style Transfer: Hongda Liu,

Longguang Wang,

Ye Zhang,

Ziru Yu,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hongda and Wang, Longguang and Zhang, Ye and Yu, Ziru and Guo, Yulan}, title = {SaMam: Style-aware State Space Model for Arbitrary Image Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28468-28478} }
MFogHub: Bridging Multi-Regional and Multi-Satellite Data for Global Marine Fog Detection and Forecasting: Mengqiu Xu,

Kaixin Chen,

Heng Guo,

Yixiang Huang,

Ming Wu,

Zhenwei Shi,

Chuang Zhang,

Jun Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Mengqiu and Chen, Kaixin and Guo, Heng and Huang, Yixiang and Wu, Ming and Shi, Zhenwei and Zhang, Chuang and Guo, Jun}, title = {MFogHub: Bridging Multi-Regional and Multi-Satellite Data for Global Marine Fog Detection and Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12637-12646} }
The Illusion of Unlearning: The Unstable Nature of Machine Unlearning in Text-to-Image Diffusion Models: Naveen George,

Karthik Nandan Dasaraju,

Rutheesh Reddy Chittepu,

Konda Reddy Mopuri; [pdf] [supp]
[bibtex]
@InProceedings{George_2025_CVPR, author = {George, Naveen and Dasaraju, Karthik Nandan and Chittepu, Rutheesh Reddy and Mopuri, Konda Reddy}, title = {The Illusion of Unlearning: The Unstable Nature of Machine Unlearning in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13393-13402} }
Making Old Film Great Again: Degradation-aware State Space Model for Old Film Restoration: Yudong Mao,

Hao Luo,

Zhiwei Zhong,

Peilin Chen,

Zhijiang Zhang,

Shiqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Yudong and Luo, Hao and Zhong, Zhiwei and Chen, Peilin and Zhang, Zhijiang and Wang, Shiqi}, title = {Making Old Film Great Again: Degradation-aware State Space Model for Old Film Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28039-28049} }
Leveraging Global Stereo Consistency for Category-Level Shape and 6D Pose Estimation from Stereo Images: Junning Qiu,

Minglei Lu,

Fei Wang,

Yu Guo,

Yonggen Ling; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Junning and Lu, Minglei and Wang, Fei and Guo, Yu and Ling, Yonggen}, title = {Leveraging Global Stereo Consistency for Category-Level Shape and 6D Pose Estimation from Stereo Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16839-16849} }
AlphaPre: Amplitude-Phase Disentanglement Model for Precipitation Nowcasting: Kenghong Lin,

Baoquan Zhang,

Demin Yu,

Wenzhi Feng,

Shidong Chen,

Feifan Gao,

Xutao Li,

Yunming Ye; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Kenghong and Zhang, Baoquan and Yu, Demin and Feng, Wenzhi and Chen, Shidong and Gao, Feifan and Li, Xutao and Ye, Yunming}, title = {AlphaPre: Amplitude-Phase Disentanglement Model for Precipitation Nowcasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17841-17850} }
EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language Models: Yinan Liang,

Ziwei Wang,

Xiuwei Xu,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yinan and Wang, Ziwei and Xu, Xiuwei and Zhou, Jie and Lu, Jiwen}, title = {EfficientLLaVA: Generalizable Auto-Pruning for Large Vision-language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9445-9454} }
Detection-Friendly Nonuniformity Correction: A Union Framework for Infrared UAV Target Detection: Houzhang Fang,

Xiaolin Wang,

Zengyang Li,

Lu Wang,

Qingshan Li,

Yi Chang,

Luxin Yan; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Houzhang and Wang, Xiaolin and Li, Zengyang and Wang, Lu and Li, Qingshan and Chang, Yi and Yan, Luxin}, title = {Detection-Friendly Nonuniformity Correction: A Union Framework for Infrared UAV Target Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11898-11907} }
Articulated Kinematics Distillation from Video Diffusion Models: Xuan Li,

Qianli Ma,

Tsung-Yi Lin,

Yongxin Chen,

Chenfanfu Jiang,

Ming-Yu Liu,

Donglai Xiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xuan and Ma, Qianli and Lin, Tsung-Yi and Chen, Yongxin and Jiang, Chenfanfu and Liu, Ming-Yu and Xiang, Donglai}, title = {Articulated Kinematics Distillation from Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17571-17581} }
ExpertAF: Expert Actionable Feedback from Video: Kumar Ashutosh,

Tushar Nagarajan,

Georgios Pavlakos,

Kris Kitani,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ashutosh_2025_CVPR, author = {Ashutosh, Kumar and Nagarajan, Tushar and Pavlakos, Georgios and Kitani, Kris and Grauman, Kristen}, title = {ExpertAF: Expert Actionable Feedback from Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13582-13594} }
MP-SfM: Monocular Surface Priors for Robust Structure-from-Motion: Zador Pataki,

Paul-Edouard Sarlin,

Johannes L. Schönberger,

Marc Pollefeys; [pdf] [supp]
[bibtex]
@InProceedings{Pataki_2025_CVPR, author = {Pataki, Zador and Sarlin, Paul-Edouard and Sch\"onberger, Johannes L. and Pollefeys, Marc}, title = {MP-SfM: Monocular Surface Priors for Robust Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21891-21901} }
OnlineAnySeg: Online Zero-Shot 3D Segmentation by Visual Foundation Model Guided 2D Mask Merging: Yijie Tang,

Jiazhao Zhang,

Yuqing Lan,

Yulan Guo,

Dezun Dong,

Chenyang Zhu,

Kai Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yijie and Zhang, Jiazhao and Lan, Yuqing and Guo, Yulan and Dong, Dezun and Zhu, Chenyang and Xu, Kai}, title = {OnlineAnySeg: Online Zero-Shot 3D Segmentation by Visual Foundation Model Guided 2D Mask Merging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3676-3685} }
Tora: Trajectory-oriented Diffusion Transformer for Video Generation: Zhenghao Zhang,

Junchao Liao,

Menghao Li,

ZuoZhuo Dai,

Bingxue Qiu,

Siyu Zhu,

Long Qin,

Weizhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhenghao and Liao, Junchao and Li, Menghao and Dai, ZuoZhuo and Qiu, Bingxue and Zhu, Siyu and Qin, Long and Wang, Weizhi}, title = {Tora: Trajectory-oriented Diffusion Transformer for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2063-2073} }
Volumetrically Consistent 3D Gaussian Rasterization: Chinmay Talegaonkar,

Yash Belhe,

Ravi Ramamoorthi,

Nicholas Antipa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Talegaonkar_2025_CVPR, author = {Talegaonkar, Chinmay and Belhe, Yash and Ramamoorthi, Ravi and Antipa, Nicholas}, title = {Volumetrically Consistent 3D Gaussian Rasterization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10953-10963} }
Deterministic-to-Stochastic Diverse Latent Feature Mapping for Human Motion Synthesis: Yu Hua,

Weiming Liu,

Gui Xu,

Yaqing Hou,

Yew-Soon Ong,

Qiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2025_CVPR, author = {Hua, Yu and Liu, Weiming and Xu, Gui and Hou, Yaqing and Ong, Yew-Soon and Zhang, Qiang}, title = {Deterministic-to-Stochastic Diverse Latent Feature Mapping for Human Motion Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22724-22734} }
Morpheus: Text-Driven 3D Gaussian Splat Shape and Color Stylization: Jamie Wynn,

Zawar Qureshi,

Jakub Powierza,

Jamie Watson,

Mohamed Sayed; [pdf] [arXiv]
[bibtex]
@InProceedings{Wynn_2025_CVPR, author = {Wynn, Jamie and Qureshi, Zawar and Powierza, Jakub and Watson, Jamie and Sayed, Mohamed}, title = {Morpheus: Text-Driven 3D Gaussian Splat Shape and Color Stylization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7825-7836} }
CacheQuant: Comprehensively Accelerated Diffusion Models: Xuewen Liu,

Zhikai Li,

Qingyi Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xuewen and Li, Zhikai and Gu, Qingyi}, title = {CacheQuant: Comprehensively Accelerated Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23269-23280} }
The Impact Label Noise and Choice of Threshold has on Cross-Entropy and Soft-Dice in Image Segmentation: Marcus Nordström,

Atsuto Maki,

Henrik Hult; [pdf] [supp]
[bibtex]
@InProceedings{Nordstrom_2025_CVPR, author = {Nordstr\"om, Marcus and Maki, Atsuto and Hult, Henrik}, title = {The Impact Label Noise and Choice of Threshold has on Cross-Entropy and Soft-Dice in Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20820-20829} }
Open-World Objectness Modeling Unifies Novel Object Detection: Shan Zhang,

Yao Ni,

Jinhao Du,

Yuan Xue,

Philip Torr,

Piotr Koniusz,

Anton van den Hengel; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shan and Ni, Yao and Du, Jinhao and Xue, Yuan and Torr, Philip and Koniusz, Piotr and van den Hengel, Anton}, title = {Open-World Objectness Modeling Unifies Novel Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30332-30342} }
LLaVA-Critic: Learning to Evaluate Multimodal Models: Tianyi Xiong,

Xiyao Wang,

Dong Guo,

Qinghao Ye,

Haoqi Fan,

Quanquan Gu,

Heng Huang,

Chunyuan Li; [pdf] [supp]
[bibtex]
@InProceedings{Xiong_2025_CVPR, author = {Xiong, Tianyi and Wang, Xiyao and Guo, Dong and Ye, Qinghao and Fan, Haoqi and Gu, Quanquan and Huang, Heng and Li, Chunyuan}, title = {LLaVA-Critic: Learning to Evaluate Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13618-13628} }
VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge: Vishwesh Nath,

Wenqi Li,

Dong Yang,

Andriy Myronenko,

Mingxin Zheng,

Yao Lu,

Zhijian Liu,

Hongxu Yin,

Yee Man Law,

Yucheng Tang,

Pengfei Guo,

Can Zhao,

Ziyue Xu,

Yufan He,

Stephanie Harmon,

Benjamin Simon,

Greg Heinrich,

Stephen Aylward,

Marc Edgar,

Michael Zephyr,

Pavlo Molchanov,

Baris Turkbey,

Holger Roth,

Daguang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Nath_2025_CVPR, author = {Nath, Vishwesh and Li, Wenqi and Yang, Dong and Myronenko, Andriy and Zheng, Mingxin and Lu, Yao and Liu, Zhijian and Yin, Hongxu and Law, Yee Man and Tang, Yucheng and Guo, Pengfei and Zhao, Can and Xu, Ziyue and He, Yufan and Harmon, Stephanie and Simon, Benjamin and Heinrich, Greg and Aylward, Stephen and Edgar, Marc and Zephyr, Michael and Molchanov, Pavlo and Turkbey, Baris and Roth, Holger and Xu, Daguang}, title = {VILA-M3: Enhancing Vision-Language Models with Medical Expert Knowledge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14788-14798} }
Repurposing Pre-trained Video Diffusion Models for Event-based Video Interpolation: Jingxi Chen,

Brandon Y. Feng,

Haoming Cai,

Tianfu Wang,

Levi Burner,

Dehao Yuan,

Cornelia Fermuller,

Christopher A. Metzler,

Yiannis Aloimonos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jingxi and Feng, Brandon Y. and Cai, Haoming and Wang, Tianfu and Burner, Levi and Yuan, Dehao and Fermuller, Cornelia and Metzler, Christopher A. and Aloimonos, Yiannis}, title = {Repurposing Pre-trained Video Diffusion Models for Event-based Video Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12456-12466} }
MotionPRO: Exploring the Role of Pressure in Human MoCap and Beyond: Shenghao Ren,

Yi Lu,

Jiayi Huang,

Jiayi Zhao,

He Zhang,

Tao Yu,

Qiu Shen,

Xun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Shenghao and Lu, Yi and Huang, Jiayi and Zhao, Jiayi and Zhang, He and Yu, Tao and Shen, Qiu and Cao, Xun}, title = {MotionPRO: Exploring the Role of Pressure in Human MoCap and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27760-27770} }
DiffVsgg: Diffusion-Driven Online Video Scene Graph Generation: Mu Chen,

Liulei Li,

Wenguan Wang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Mu and Li, Liulei and Wang, Wenguan and Yang, Yi}, title = {DiffVsgg: Diffusion-Driven Online Video Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29161-29172} }
Large-scale Multi-view Tensor Clustering with Implicit Linear Kernels: Jiyuan Liu,

Xinwang Liu,

Chuankun Li,

Xinhang Wan,

Hao Tan,

Yi Zhang,

Weixuan Liang,

Qian Qu,

Yu Feng,

Renxiang Guan,

Ke Liang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiyuan and Liu, Xinwang and Li, Chuankun and Wan, Xinhang and Tan, Hao and Zhang, Yi and Liang, Weixuan and Qu, Qian and Feng, Yu and Guan, Renxiang and Liang, Ke}, title = {Large-scale Multi-view Tensor Clustering with Implicit Linear Kernels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20727-20736} }
Generalized Diffusion Detector: Mining Robust Features from Diffusion Models for Domain-Generalized Detection: Boyong He,

Yuxiang Ji,

Qianwen Ye,

Zhuoyue Tan,

Liaoni Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Boyong and Ji, Yuxiang and Ye, Qianwen and Tan, Zhuoyue and Wu, Liaoni}, title = {Generalized Diffusion Detector: Mining Robust Features from Diffusion Models for Domain-Generalized Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9921-9932} }
Q-Eval-100K: Evaluating Visual Quality and Alignment Level for Text-to-Vision Content: Zicheng Zhang,

Tengchuan Kou,

Shushi Wang,

Chunyi Li,

Wei Sun,

Wei Wang,

Xiaoyu Li,

Zongyu Wang,

Xuezhi Cao,

Xiongkuo Min,

Xiaohong Liu,

Guangtao Zhai; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zicheng and Kou, Tengchuan and Wang, Shushi and Li, Chunyi and Sun, Wei and Wang, Wei and Li, Xiaoyu and Wang, Zongyu and Cao, Xuezhi and Min, Xiongkuo and Liu, Xiaohong and Zhai, Guangtao}, title = {Q-Eval-100K: Evaluating Visual Quality and Alignment Level for Text-to-Vision Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10621-10631} }
Dual Focus-Attention Transformer for Robust Point Cloud Registration: Kexue Fu,

Mingzhi Yuan,

Changwei Wang,

Weiguang Pang,

Jing Chi,

Manning Wang,

Longxiang Gao; [pdf]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Kexue and Yuan, Mingzhi and Wang, Changwei and Pang, Weiguang and Chi, Jing and Wang, Manning and Gao, Longxiang}, title = {Dual Focus-Attention Transformer for Robust Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11769-11778} }
Forming Auxiliary High-confident Instance-level Loss to Promote Learning from Label Proportions: Tianhao Ma,

Han Chen,

Juncheng Hu,

Yungang Zhu,

Ximing Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Tianhao and Chen, Han and Hu, Juncheng and Zhu, Yungang and Li, Ximing}, title = {Forming Auxiliary High-confident Instance-level Loss to Promote Learning from Label Proportions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20592-20601} }
Progress-Aware Video Frame Captioning: Zihui Xue,

Joungbin An,

Xitong Yang,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Zihui and An, Joungbin and Yang, Xitong and Grauman, Kristen}, title = {Progress-Aware Video Frame Captioning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13639-13650} }
SMTPD: A New Benchmark for Temporal Prediction of Social Media Popularity: Yijie Xu,

Bolun Zheng,

Wei Zhu,

Hangjia Pan,

Yuchen Yao,

Ning Xu,

Anan Liu,

Quan Zhang,

Chenggang Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yijie and Zheng, Bolun and Zhu, Wei and Pan, Hangjia and Yao, Yuchen and Xu, Ning and Liu, Anan and Zhang, Quan and Yan, Chenggang}, title = {SMTPD: A New Benchmark for Temporal Prediction of Social Media Popularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18847-18857} }
Enhancing Dance-to-Music Generation via Negative Conditioning Latent Diffusion Model: Changchang Sun,

Gaowen Liu,

Charles Fleming,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Changchang and Liu, Gaowen and Fleming, Charles and Yan, Yan}, title = {Enhancing Dance-to-Music Generation via Negative Conditioning Latent Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8321-8330} }
Neuro-Symbolic Evaluation of Text-to-Video Models using Formal Verification: S P Sharan,

Minkyu Choi,

Sahil Shah,

Harsh Goel,

Mohammad Omama,

Sandeep Chinchali; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharan_2025_CVPR, author = {Sharan, S P and Choi, Minkyu and Shah, Sahil and Goel, Harsh and Omama, Mohammad and Chinchali, Sandeep}, title = {Neuro-Symbolic Evaluation of Text-to-Video Models using Formal Verification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8395-8405} }
Spherical Manifold Guided Diffusion Model for Panoramic Image Generation: Xiancheng Sun,

Mai Xu,

Shengxi Li,

Senmao Ma,

Xin Deng,

Lai Jiang,

Gang Shen; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Xiancheng and Xu, Mai and Li, Shengxi and Ma, Senmao and Deng, Xin and Jiang, Lai and Shen, Gang}, title = {Spherical Manifold Guided Diffusion Model for Panoramic Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5824-5834} }
Learning on Model Weights using Tree Experts: Eliahu Horwitz,

Bar Cavia,

Jonathan Kahana,

Yedid Hoshen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Horwitz_2025_CVPR, author = {Horwitz, Eliahu and Cavia, Bar and Kahana, Jonathan and Hoshen, Yedid}, title = {Learning on Model Weights using Tree Experts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20468-20478} }
Rethinking Query-based Transformer for Continual Image Segmentation: Yuchen Zhu,

Cheng Shi,

Dingyou Wang,

Jiajin Tang,

Zhengxuan Wei,

Yu Wu,

Guanbin Li,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yuchen and Shi, Cheng and Wang, Dingyou and Tang, Jiajin and Wei, Zhengxuan and Wu, Yu and Li, Guanbin and Yang, Sibei}, title = {Rethinking Query-based Transformer for Continual Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4595-4606} }
Image Reconstruction from Readout-Multiplexed Single-Photon Detector Arrays: Shashwath Bharadwaj,

Ruangrawee Kitichotkul,

Akshay Agarwal,

Vivek K Goyal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bharadwaj_2025_CVPR, author = {Bharadwaj, Shashwath and Kitichotkul, Ruangrawee and Agarwal, Akshay and Goyal, Vivek K}, title = {Image Reconstruction from Readout-Multiplexed Single-Photon Detector Arrays}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11406-11415} }
Towards Smart Point-and-Shoot Photography: Jiawan Li,

Fei Zhou,

Zhipeng Zhong,

Jiongzhi Lin,

Guoping Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiawan and Zhou, Fei and Zhong, Zhipeng and Lin, Jiongzhi and Qiu, Guoping}, title = {Towards Smart Point-and-Shoot Photography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28242-28251} }
SlideChat: A Large Vision-Language Assistant for Whole-Slide Pathology Image Understanding: Ying Chen,

Guoan Wang,

Yuanfeng Ji,

Yanjun Li,

Jin Ye,

Tianbin Li,

Ming Hu,

Rongshan Yu,

Yu Qiao,

Junjun He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ying and Wang, Guoan and Ji, Yuanfeng and Li, Yanjun and Ye, Jin and Li, Tianbin and Hu, Ming and Yu, Rongshan and Qiao, Yu and He, Junjun}, title = {SlideChat: A Large Vision-Language Assistant for Whole-Slide Pathology Image Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5134-5143} }
Prototype-Based Image Prompting for Weakly Supervised Histopathological Image Segmentation: Qingchen Tang,

Lei Fan,

Maurice Pagnucco,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Qingchen and Fan, Lei and Pagnucco, Maurice and Song, Yang}, title = {Prototype-Based Image Prompting for Weakly Supervised Histopathological Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30271-30280} }
Towards Transformer-Based Aligned Generation with Self-Coherence Guidance: Shulei Wang,

Wang Lin,

Hai Huang,

Hanting Wang,

Sihang Cai,

WenKang Han,

Tao Jin,

Jingyuan Chen,

Jiacheng Sun,

Jieming Zhu,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shulei and Lin, Wang and Huang, Hai and Wang, Hanting and Cai, Sihang and Han, WenKang and Jin, Tao and Chen, Jingyuan and Sun, Jiacheng and Zhu, Jieming and Zhao, Zhou}, title = {Towards Transformer-Based Aligned Generation with Self-Coherence Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18455-18464} }
Accurate Scene Text Recognition with Efficient Model Scaling and Cloze Self-Distillation: Andrea Maracani,

Savas Ozkan,

Sijun Cho,

Hyowon Kim,

Eunchung Noh,

Jeongwon Min,

Cho Jung Min,

Dookun Park,

Mete Ozay; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maracani_2025_CVPR, author = {Maracani, Andrea and Ozkan, Savas and Cho, Sijun and Kim, Hyowon and Noh, Eunchung and Min, Jeongwon and Min, Cho Jung and Park, Dookun and Ozay, Mete}, title = {Accurate Scene Text Recognition with Efficient Model Scaling and Cloze Self-Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14516-14526} }
DART: Disease-aware Image-Text Alignment and Self-correcting Re-alignment for Trustworthy Radiology Report Generation: Sang-Jun Park,

Keun-Soo Heo,

Dong-Hee Shin,

Young-Han Son,

Ji-Hye Oh,

Tae-Eui Kam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Sang-Jun and Heo, Keun-Soo and Shin, Dong-Hee and Son, Young-Han and Oh, Ji-Hye and Kam, Tae-Eui}, title = {DART: Disease-aware Image-Text Alignment and Self-correcting Re-alignment for Trustworthy Radiology Report Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15580-15589} }
On the Consistency of Video Large Language Models in Temporal Comprehension: Minjoon Jung,

Junbin Xiao,

Byoung-Tak Zhang,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Minjoon and Xiao, Junbin and Zhang, Byoung-Tak and Yao, Angela}, title = {On the Consistency of Video Large Language Models in Temporal Comprehension}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13713-13722} }
Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation: Jiaming Zhou,

Teli Ma,

Kun-Yu Lin,

Zifan Wang,

Ronghe Qiu,

Junwei Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jiaming and Ma, Teli and Lin, Kun-Yu and Wang, Zifan and Qiu, Ronghe and Liang, Junwei}, title = {Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22551-22561} }
Less is More: Efficient Model Merging with Binary Task Switch: Biqing Qi,

Fangyuan Li,

Zhen Wang,

Junqi Gao,

Dong Li,

Peng Ye,

Bowen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Biqing and Li, Fangyuan and Wang, Zhen and Gao, Junqi and Li, Dong and Ye, Peng and Zhou, Bowen}, title = {Less is More: Efficient Model Merging with Binary Task Switch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15265-15274} }
One-Minute Video Generation with Test-Time Training: Karan Dalal,

Daniel Koceja,

Jiarui Xu,

Yue Zhao,

Shihao Han,

Ka Chun Cheung,

Jan Kautz,

Yejin Choi,

Yu Sun,

Xiaolong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dalal_2025_CVPR, author = {Dalal, Karan and Koceja, Daniel and Xu, Jiarui and Zhao, Yue and Han, Shihao and Cheung, Ka Chun and Kautz, Jan and Choi, Yejin and Sun, Yu and Wang, Xiaolong}, title = {One-Minute Video Generation with Test-Time Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17702-17711} }
InteractionMap: Improving Online Vectorized HDMap Construction with Interaction: Kuang Wu,

Chuan Yang,

Zhanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Kuang and Yang, Chuan and Li, Zhanbin}, title = {InteractionMap: Improving Online Vectorized HDMap Construction with Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17176-17186} }
Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding: Wenxuan Guo,

Xiuwei Xu,

Ziwei Wang,

Jianjiang Feng,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Wenxuan and Xu, Xiuwei and Wang, Ziwei and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {Text-guided Sparse Voxel Pruning for Efficient 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3666-3675} }
ROCKET-1: Mastering Open-World Interaction with Visual-Temporal Context Prompting: Shaofei Cai,

Zihao Wang,

Kewei Lian,

Zhancun Mu,

Xiaojian Ma,

Anji Liu,

Yitao Liang; [pdf]
[bibtex]
@InProceedings{Cai_2025_CVPR, author = {Cai, Shaofei and Wang, Zihao and Lian, Kewei and Mu, Zhancun and Ma, Xiaojian and Liu, Anji and Liang, Yitao}, title = {ROCKET-1: Mastering Open-World Interaction with Visual-Temporal Context Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12122-12131} }
Common3D: Self-Supervised Learning of 3D Morphable Models for Common Objects in Neural Feature Space: Leonhard Sommer,

Olaf Dünkel,

Christian Theobalt,

Adam Kortylewski; [pdf] [supp]
[bibtex]
@InProceedings{Sommer_2025_CVPR, author = {Sommer, Leonhard and D\"unkel, Olaf and Theobalt, Christian and Kortylewski, Adam}, title = {Common3D: Self-Supervised Learning of 3D Morphable Models for Common Objects in Neural Feature Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6468-6479} }
RLAIF-V: Open-Source AI Feedback Leads to Super GPT-4V Trustworthiness: Tianyu Yu,

Haoye Zhang,

Qiming Li,

Qixin Xu,

Yuan Yao,

Da Chen,

Xiaoman Lu,

Ganqu Cui,

Yunkai Dang,

Taiwen He,

Xiaocheng Feng,

Jun Song,

Bo Zheng,

Zhiyuan Liu,

Tat-Seng Chua,

Maosong Sun; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Tianyu and Zhang, Haoye and Li, Qiming and Xu, Qixin and Yao, Yuan and Chen, Da and Lu, Xiaoman and Cui, Ganqu and Dang, Yunkai and He, Taiwen and Feng, Xiaocheng and Song, Jun and Zheng, Bo and Liu, Zhiyuan and Chua, Tat-Seng and Sun, Maosong}, title = {RLAIF-V: Open-Source AI Feedback Leads to Super GPT-4V Trustworthiness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19985-19995} }
ECVC: Exploiting Non-Local Correlations in Multiple Frames for Contextual Video Compression: Wei Jiang,

Junru Li,

Kai Zhang,

Li Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Wei and Li, Junru and Zhang, Kai and Zhang, Li}, title = {ECVC: Exploiting Non-Local Correlations in Multiple Frames for Contextual Video Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7331-7341} }
LinGen: Towards High-Resolution Minute-Length Text-to-Video Generation with Linear Computational Complexity: Hongjie Wang,

Chih-Yao Ma,

Yen-Cheng Liu,

Ji Hou,

Tao Xu,

Jialiang Wang,

Felix Juefei-Xu,

Yaqiao Luo,

Peizhao Zhang,

Tingbo Hou,

Peter Vajda,

Niraj K. Jha,

Xiaoliang Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hongjie and Ma, Chih-Yao and Liu, Yen-Cheng and Hou, Ji and Xu, Tao and Wang, Jialiang and Juefei-Xu, Felix and Luo, Yaqiao and Zhang, Peizhao and Hou, Tingbo and Vajda, Peter and Jha, Niraj K. and Dai, Xiaoliang}, title = {LinGen: Towards High-Resolution Minute-Length Text-to-Video Generation with Linear Computational Complexity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2578-2588} }
EditSplat: Multi-View Fusion and Attention-Guided Optimization for View-Consistent 3D Scene Editing with 3D Gaussian Splatting: Dong In Lee,

Hyeongcheol Park,

Jiyoung Seo,

Eunbyung Park,

Hyunje Park,

Ha Dam Baek,

Sangheon Shin,

Sangmin Kim,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Dong In and Park, Hyeongcheol and Seo, Jiyoung and Park, Eunbyung and Park, Hyunje and Baek, Ha Dam and Shin, Sangheon and Kim, Sangmin and Kim, Sangpil}, title = {EditSplat: Multi-View Fusion and Attention-Guided Optimization for View-Consistent 3D Scene Editing with 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11135-11145} }
SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language: Zehan Wang,

Sashuai Zhou,

Shaoxuan He,

Haifeng Huang,

Lihe Yang,

Ziang Zhang,

Xize Cheng,

Shengpeng Ji,

Tao Jin,

Hengshuang Zhao,

Zhou Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zehan and Zhou, Sashuai and He, Shaoxuan and Huang, Haifeng and Yang, Lihe and Zhang, Ziang and Cheng, Xize and Ji, Shengpeng and Jin, Tao and Zhao, Hengshuang and Zhao, Zhou}, title = {SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29656-29666} }
Mono2Stereo: A Benchmark and Empirical Study for Stereo Conversion: Songsong Yu,

Yuxin Chen,

Zhongang Qi,

Zeke Xie,

Yifan Wang,

Lijun Wang,

Ying Shan,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Songsong and Chen, Yuxin and Qi, Zhongang and Xie, Zeke and Wang, Yifan and Wang, Lijun and Shan, Ying and Lu, Huchuan}, title = {Mono2Stereo: A Benchmark and Empirical Study for Stereo Conversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21847-21856} }
Towards Open-Vocabulary Audio-Visual Event Localization: Jinxing Zhou,

Dan Guo,

Ruohao Guo,

Yuxin Mao,

Jingjing Hu,

Yiran Zhong,

Xiaojun Chang,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jinxing and Guo, Dan and Guo, Ruohao and Mao, Yuxin and Hu, Jingjing and Zhong, Yiran and Chang, Xiaojun and Wang, Meng}, title = {Towards Open-Vocabulary Audio-Visual Event Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8362-8371} }
One-shot 3D Object Canonicalization based on Geometric and Semantic Consistency: Li Jin,

Yujie Wang,

Wenzheng Chen,

Qiyu Dai,

Qingzhe Gao,

Xueying Qin,

Baoquan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Li and Wang, Yujie and Chen, Wenzheng and Dai, Qiyu and Gao, Qingzhe and Qin, Xueying and Chen, Baoquan}, title = {One-shot 3D Object Canonicalization based on Geometric and Semantic Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16850-16859} }
Inst3D-LMM: Instance-Aware 3D Scene Understanding with Multi-modal Instruction Tuning: Hanxun Yu,

Wentong Li,

Song Wang,

Junbo Chen,

Jianke Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hanxun and Li, Wentong and Wang, Song and Chen, Junbo and Zhu, Jianke}, title = {Inst3D-LMM: Instance-Aware 3D Scene Understanding with Multi-modal Instruction Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14147-14157} }
S2Gaussian: Sparse-View Super-Resolution 3D Gaussian Splatting: Yecong Wan,

Mingwen Shao,

Yuanshuo Cheng,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2025_CVPR, author = {Wan, Yecong and Shao, Mingwen and Cheng, Yuanshuo and Zuo, Wangmeng}, title = {S2Gaussian: Sparse-View Super-Resolution 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {711-721} }
HIIF: Hierarchical Encoding based Implicit Image Function for Continuous Super-resolution: Yuxuan Jiang,

Ho Man Kwan,

Tianhao Peng,

Ge Gao,

Fan Zhang,

Xiaoqing Zhu,

Joel Sole,

David Bull; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuxuan and Kwan, Ho Man and Peng, Tianhao and Gao, Ge and Zhang, Fan and Zhu, Xiaoqing and Sole, Joel and Bull, David}, title = {HIIF: Hierarchical Encoding based Implicit Image Function for Continuous Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2289-2299} }
Motion Prompting: Controlling Video Generation with Motion Trajectories: Daniel Geng,

Charles Herrmann,

Junhwa Hur,

Forrester Cole,

Serena Zhang,

Tobias Pfaff,

Tatiana Lopez-Guevara,

Yusuf Aytar,

Michael Rubinstein,

Chen Sun,

Oliver Wang,

Andrew Owens,

Deqing Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2025_CVPR, author = {Geng, Daniel and Herrmann, Charles and Hur, Junhwa and Cole, Forrester and Zhang, Serena and Pfaff, Tobias and Lopez-Guevara, Tatiana and Aytar, Yusuf and Rubinstein, Michael and Sun, Chen and Wang, Oliver and Owens, Andrew and Sun, Deqing}, title = {Motion Prompting: Controlling Video Generation with Motion Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1-12} }
VERA: Explainable Video Anomaly Detection via Verbalized Learning of Vision-Language Models: Muchao Ye,

Weiyang Liu,

Pan He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Muchao and Liu, Weiyang and He, Pan}, title = {VERA: Explainable Video Anomaly Detection via Verbalized Learning of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8679-8688} }
ProHOC: Probabilistic Hierarchical Out-of-Distribution Classification via Multi-Depth Networks: Erik Wallin,

Fredrik Kahl,

Lars Hammarstrand; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wallin_2025_CVPR, author = {Wallin, Erik and Kahl, Fredrik and Hammarstrand, Lars}, title = {ProHOC: Probabilistic Hierarchical Out-of-Distribution Classification via Multi-Depth Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20612-20621} }
CCIN: Compositional Conflict Identification and Neutralization for Composed Image Retrieval: Likai Tian,

Jian Zhao,

Zechao Hu,

Zhengwei Yang,

Hao Li,

Lei Jin,

Zheng Wang,

Xuelong Li; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Likai and Zhao, Jian and Hu, Zechao and Yang, Zhengwei and Li, Hao and Jin, Lei and Wang, Zheng and Li, Xuelong}, title = {CCIN: Compositional Conflict Identification and Neutralization for Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3974-3983} }
CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP: Songlong Xing,

Zhengyu Zhao,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Songlong and Zhao, Zhengyu and Sebe, Nicu}, title = {CLIP is Strong Enough to Fight Back: Test-time Counterattacks towards Zero-shot Adversarial Robustness of CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15172-15182} }
OverLoCK: An Overview-first-Look-Closely-next ConvNet with Context-Mixing Dynamic Kernels: Meng Lou,

Yizhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lou_2025_CVPR, author = {Lou, Meng and Yu, Yizhou}, title = {OverLoCK: An Overview-first-Look-Closely-next ConvNet with Context-Mixing Dynamic Kernels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {128-138} }
SoftShadow: Leveraging Soft Masks for Penumbra-Aware Shadow Removal: Xinrui Wang,

Lanqing Guo,

Xiyu Wang,

Siyu Huang,

Bihan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xinrui and Guo, Lanqing and Wang, Xiyu and Huang, Siyu and Wen, Bihan}, title = {SoftShadow: Leveraging Soft Masks for Penumbra-Aware Shadow Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23217-23226} }
Graph-Embedded Structure-Aware Perceptual Hashing for Neural Network Protection and Piracy Detection: Ruiheng Liu,

Haozhe Chen,

Boyao Zhao,

Kejiang Chen,

Weiming Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Ruiheng and Chen, Haozhe and Zhao, Boyao and Chen, Kejiang and Zhang, Weiming}, title = {Graph-Embedded Structure-Aware Perceptual Hashing for Neural Network Protection and Piracy Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20169-20178} }
VTON-HandFit: Virtual Try-on for Arbitrary Hand Pose Guided by Hand Priors Embedding: Yujie Liang,

Xiaobin Hu,

Boyuan Jiang,

Donghao Luo,

Xu Peng,

Kai Wu,

Chengming Xu,

Wenhui Han,

Taisong Jin,

Chengjie Wang,

Rongrong Ji; [pdf]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yujie and Hu, Xiaobin and Jiang, Boyuan and Luo, Donghao and Peng, Xu and Wu, Kai and Xu, Chengming and Han, Wenhui and Jin, Taisong and Wang, Chengjie and Ji, Rongrong}, title = {VTON-HandFit: Virtual Try-on for Arbitrary Hand Pose Guided by Hand Priors Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22616-22626} }
Interleaved-Modal Chain-of-Thought: Jun Gao,

Yongqi Li,

Ziqiang Cao,

Wenjie Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Jun and Li, Yongqi and Cao, Ziqiang and Li, Wenjie}, title = {Interleaved-Modal Chain-of-Thought}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19520-19529} }
Uni-Renderer: Unifying Rendering and Inverse Rendering Via Dual Stream Diffusion: Zhifei Chen,

Tianshuo Xu,

Wenhang Ge,

Leyi Wu,

Dongyu Yan,

Jing He,

Luozhou Wang,

Lu Zeng,

Shunsi Zhang,

Ying-Cong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhifei and Xu, Tianshuo and Ge, Wenhang and Wu, Leyi and Yan, Dongyu and He, Jing and Wang, Luozhou and Zeng, Lu and Zhang, Shunsi and Chen, Ying-Cong}, title = {Uni-Renderer: Unifying Rendering and Inverse Rendering Via Dual Stream Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26504-26513} }
Enhancing Adversarial Transferability with Checkpoints of a Single Model's Training: Shixin Li,

Chaoxiang He,

Xiaojing Ma,

Bin Benjamin Zhu,

Shuo Wang,

Hongsheng Hu,

Dongmei Zhang,

Linchen Yu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shixin and He, Chaoxiang and Ma, Xiaojing and Zhu, Bin Benjamin and Wang, Shuo and Hu, Hongsheng and Zhang, Dongmei and Yu, Linchen}, title = {Enhancing Adversarial Transferability with Checkpoints of a Single Model's Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20685-20694} }
POSTA: A Go-to Framework for Customized Artistic Poster Generation: Haoyu Chen,

Xiaojie Xu,

Wenbo Li,

Jingjing Ren,

Tian Ye,

Songhua Liu,

Ying-Cong Chen,

Lei Zhu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Haoyu and Xu, Xiaojie and Li, Wenbo and Ren, Jingjing and Ye, Tian and Liu, Songhua and Chen, Ying-Cong and Zhu, Lei and Wang, Xinchao}, title = {POSTA: A Go-to Framework for Customized Artistic Poster Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28694-28704} }
NSD-Imagery: A Benchmark Dataset for Extending fMRI Vision Decoding Methods to Mental Imagery: Reese Kneeland,

Paul S. Scotti,

Ghislain St-Yves,

Jesse Breedlove,

Kendrick Kay,

Thomas Naselaris; [pdf] [supp]
[bibtex]
@InProceedings{Kneeland_2025_CVPR, author = {Kneeland, Reese and Scotti, Paul S. and St-Yves, Ghislain and Breedlove, Jesse and Kay, Kendrick and Naselaris, Thomas}, title = {NSD-Imagery: A Benchmark Dataset for Extending fMRI Vision Decoding Methods to Mental Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28852-28862} }
VLsI: Verbalized Layers-to-Interactions from Large to Small Vision Language Models: Byung-Kwan Lee,

Ryo Hachiuma,

Yu-Chiang Frank Wang,

Yong Man Ro,

Yueh-Hua Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Byung-Kwan and Hachiuma, Ryo and Wang, Yu-Chiang Frank and Ro, Yong Man and Wu, Yueh-Hua}, title = {VLsI: Verbalized Layers-to-Interactions from Large to Small Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29545-29557} }
O-TPT: Orthogonality Constraints for Calibrating Test-time Prompt Tuning in Vision-Language Models: Ashshak Sharifdeen,

Muhammad Akhtar Munir,

Sanoojan Baliah,

Salman Khan,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{Sharifdeen_2025_CVPR, author = {Sharifdeen, Ashshak and Munir, Muhammad Akhtar and Baliah, Sanoojan and Khan, Salman and Khan, Muhammad Haris}, title = {O-TPT: Orthogonality Constraints for Calibrating Test-time Prompt Tuning in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19942-19951} }
Just Dance with pi! A Poly-modal Inductor for Weakly-supervised Video Anomaly Detection: Snehashis Majhi,

Giacomo D'Amicantonio,

Antitza Dantcheva,

Quan Kong,

Lorenzo Garattoni,

Gianpiero Francesca,

Egor Bondarev,

Francois Bremond; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Majhi_2025_CVPR, author = {Majhi, Snehashis and D'Amicantonio, Giacomo and Dantcheva, Antitza and Kong, Quan and Garattoni, Lorenzo and Francesca, Gianpiero and Bondarev, Egor and Bremond, Francois}, title = {Just Dance with pi! A Poly-modal Inductor for Weakly-supervised Video Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24265-24274} }
Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality: Liyan Chen,

Gregory P. Meyer,

Zaiwei Zhang,

Eric M. Wolff,

Paul Vernaza; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Liyan and Meyer, Gregory P. and Zhang, Zaiwei and Wolff, Eric M. and Vernaza, Paul}, title = {Flash3D: Super-scaling Point Transformers through Joint Hardware-Geometry Locality}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6595-6604} }
Analyzing the Synthetic-to-Real Domain Gap in 3D Hand Pose Estimation: Zhuoran Zhao,

Linlin Yang,

Pengzhan Sun,

Pan Hui,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhuoran and Yang, Linlin and Sun, Pengzhan and Hui, Pan and Yao, Angela}, title = {Analyzing the Synthetic-to-Real Domain Gap in 3D Hand Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12255-12265} }
Feature4X: Bridging Any Monocular Video to 4D Agentic AI with Versatile Gaussian Feature Fields: Shijie Zhou,

Hui Ren,

Yijia Weng,

Shuwang Zhang,

Zhen Wang,

Dejia Xu,

Zhiwen Fan,

Suya You,

Zhangyang Wang,

Leonidas Guibas,

Achuta Kadambi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Shijie and Ren, Hui and Weng, Yijia and Zhang, Shuwang and Wang, Zhen and Xu, Dejia and Fan, Zhiwen and You, Suya and Wang, Zhangyang and Guibas, Leonidas and Kadambi, Achuta}, title = {Feature4X: Bridging Any Monocular Video to 4D Agentic AI with Versatile Gaussian Feature Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14179-14190} }
Comprehensive Relighting: Generalizable and Consistent Monocular Human Relighting and Harmonization: Junying Wang,

Jingyuan Liu,

Xin Sun,

Krishna Kumar Singh,

Zhixin Shu,

He Zhang,

Jimei Yang,

Nanxuan Zhao,

Tuanfeng Y. Wang,

Simon S. Chen,

Ulrich Neumann,

Jae Shin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Junying and Liu, Jingyuan and Sun, Xin and Singh, Krishna Kumar and Shu, Zhixin and Zhang, He and Yang, Jimei and Zhao, Nanxuan and Wang, Tuanfeng Y. and Chen, Simon S. and Neumann, Ulrich and Yoon, Jae Shin}, title = {Comprehensive Relighting: Generalizable and Consistent Monocular Human Relighting and Harmonization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {380-390} }
Hyperspectral Pansharpening via Diffusion Models with Iteratively Zero-Shot Guidance: Jin-Liang Xiao,

Ting-Zhu Huang,

Liang-Jian Deng,

Guang Lin,

Zihan Cao,

Chao Li,

Qibin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Jin-Liang and Huang, Ting-Zhu and Deng, Liang-Jian and Lin, Guang and Cao, Zihan and Li, Chao and Zhao, Qibin}, title = {Hyperspectral Pansharpening via Diffusion Models with Iteratively Zero-Shot Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12669-12678} }
EASEMVC:Efficient Dual Selection Mechanism for Deep Multi-View Clustering: Baili Xiao,

Zhibin Dong,

Ke Liang,

Suyuan Liu,

Siwei Wang,

Tianrui Liu,

Xingchen Hu,

En Zhu,

Xinwang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Baili and Dong, Zhibin and Liang, Ke and Liu, Suyuan and Wang, Siwei and Liu, Tianrui and Hu, Xingchen and Zhu, En and Liu, Xinwang}, title = {EASEMVC:Efficient Dual Selection Mechanism for Deep Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20716-20726} }
Efficient Motion-Aware Video MLLM: Zijia Zhao,

Yuqi Huo,

Tongtian Yue,

Longteng Guo,

Haoyu Lu,

Bingning Wang,

Weipeng Chen,

Jing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zijia and Huo, Yuqi and Yue, Tongtian and Guo, Longteng and Lu, Haoyu and Wang, Bingning and Chen, Weipeng and Liu, Jing}, title = {Efficient Motion-Aware Video MLLM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24159-24168} }
DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering: Jingzhou Luo,

Yang Liu,

Weixing Chen,

Zhen Li,

Yaowei Wang,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Jingzhou and Liu, Yang and Chen, Weixing and Li, Zhen and Wang, Yaowei and Li, Guanbin and Lin, Liang}, title = {DSPNet: Dual-vision Scene Perception for Robust 3D Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14169-14178} }
Zero-Shot 4D Lidar Panoptic Segmentation: Yushan Zhang,

Aljoša Ošep,

Laura Leal-Taixé,

Tim Meinhardt; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yushan and O\v{s}ep, Aljo\v{s}a and Leal-Taix\'e, Laura and Meinhardt, Tim}, title = {Zero-Shot 4D Lidar Panoptic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24506-24517} }
MAtCha Gaussians: Atlas of Charts for High-Quality Geometry and Photorealism From Sparse Views: Antoine Guedon,

Tomoki Ichikawa,

Kohei Yamashita,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guedon_2025_CVPR, author = {Guedon, Antoine and Ichikawa, Tomoki and Yamashita, Kohei and Nishino, Ko}, title = {MAtCha Gaussians: Atlas of Charts for High-Quality Geometry and Photorealism From Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6001-6011} }
Extreme Rotation Estimation in the Wild: Hana Bezalel,

Dotan Ankri,

Ruojin Cai,

Hadar Averbach-Elor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bezalel_2025_CVPR, author = {Bezalel, Hana and Ankri, Dotan and Cai, Ruojin and Averbach-Elor, Hadar}, title = {Extreme Rotation Estimation in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1061-1070} }
ADU: Adaptive Detection of Unknown Categories in Black-Box Domain Adaptation: Yushan Lai,

Guowen Li,

Haoyuan Liang,

Juepeng Zheng,

Zhiyu Ye; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Yushan and Li, Guowen and Liang, Haoyuan and Zheng, Juepeng and Ye, Zhiyu}, title = {ADU: Adaptive Detection of Unknown Categories in Black-Box Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30588-30598} }
EmotiveTalk: Expressive Talking Head Generation through Audio Information Decoupling and Emotional Video Diffusion: Haotian Wang,

Yuzhe Weng,

Yueyan Li,

Zilu Guo,

Jun Du,

Shutong Niu,

Jiefeng Ma,

Shan He,

Xiaoyan Wu,

Qiming Hu,

Bing Yin,

Cong Liu,

Qingfeng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haotian and Weng, Yuzhe and Li, Yueyan and Guo, Zilu and Du, Jun and Niu, Shutong and Ma, Jiefeng and He, Shan and Wu, Xiaoyan and Hu, Qiming and Yin, Bing and Liu, Cong and Liu, Qingfeng}, title = {EmotiveTalk: Expressive Talking Head Generation through Audio Information Decoupling and Emotional Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26212-26221} }
Traversing Distortion-Perception Tradeoff using a Single Score-Based Generative Model: Yuhan Wang,

Suzhi Bi,

Ying-Jun Angela Zhang,

Xiaojun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhan and Bi, Suzhi and Zhang, Ying-Jun Angela and Yuan, Xiaojun}, title = {Traversing Distortion-Perception Tradeoff using a Single Score-Based Generative Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2377-2386} }
IceDiff: High Resolution and High-Quality Arctic Sea Ice Forecasting with Generative Diffusion Prior: Jingyi Xu,

Siwei Tu,

Weidong Yang,

Ben Fei,

Shuhao Li,

Keyi Liu,

Yeqi Luo,

Lipeng Ma,

Lei Bai; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jingyi and Tu, Siwei and Yang, Weidong and Fei, Ben and Li, Shuhao and Liu, Keyi and Luo, Yeqi and Ma, Lipeng and Bai, Lei}, title = {IceDiff: High Resolution and High-Quality Arctic Sea Ice Forecasting with Generative Diffusion Prior}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10567-10576} }
DTOS: Dynamic Time Object Sensing with Large Multimodal Model: Jirui Tian,

Jinrong Zhang,

Shenglan Liu,

Luhao Xu,

Zhixiong Huang,

Gao Huang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Jirui and Zhang, Jinrong and Liu, Shenglan and Xu, Luhao and Huang, Zhixiong and Huang, Gao}, title = {DTOS: Dynamic Time Object Sensing with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13810-13820} }
How to Merge Your Multimodal Models Over Time?: Sebastian Dziadzio,

Vishaal Udandarao,

Karsten Roth,

Ameya Prabhu,

Zeynep Akata,

Samuel Albanie,

Matthias Bethge; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dziadzio_2025_CVPR, author = {Dziadzio, Sebastian and Udandarao, Vishaal and Roth, Karsten and Prabhu, Ameya and Akata, Zeynep and Albanie, Samuel and Bethge, Matthias}, title = {How to Merge Your Multimodal Models Over Time?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20479-20491} }
Identifying and Mitigating Position Bias of Multi-image Vision-Language Models: Xinyu Tian,

Shu Zou,

Zhaoyuan Yang,

Jing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and Zhang, Jing}, title = {Identifying and Mitigating Position Bias of Multi-image Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10599-10609} }
Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning: Tim Lenz,

Peter Neidlinger,

Marta Ligero,

Georg Wölflein,

Marko van Treeck,

Jakob N. Kather; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lenz_2025_CVPR, author = {Lenz, Tim and Neidlinger, Peter and Ligero, Marta and W\"olflein, Georg and van Treeck, Marko and Kather, Jakob N.}, title = {Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30807-30817} }
Exploring CLIP's Dense Knowledge for Weakly Supervised Semantic Segmentation: Zhiwei Yang,

Yucong Meng,

Kexue Fu,

Feilong Tang,

Shuo Wang,

Zhijian Song; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhiwei and Meng, Yucong and Fu, Kexue and Tang, Feilong and Wang, Shuo and Song, Zhijian}, title = {Exploring CLIP's Dense Knowledge for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20223-20232} }
UNIALIGN: Scaling Multimodal Alignment within One Unified Model: Bo Zhou,

Liulei Li,

Yujia Wang,

Huafeng Liu,

Yazhou Yao,

Wenguan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Bo and Li, Liulei and Wang, Yujia and Liu, Huafeng and Yao, Yazhou and Wang, Wenguan}, title = {UNIALIGN: Scaling Multimodal Alignment within One Unified Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29644-29655} }
ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual Instructions: Tomáš Souček,

Prajwal Gatti,

Michael Wray,

Ivan Laptev,

Dima Damen,

Josef Sivic; [pdf] [supp]
[bibtex]
@InProceedings{Soucek_2025_CVPR, author = {Sou\v{c}ek, Tom\'a\v{s} and Gatti, Prajwal and Wray, Michael and Laptev, Ivan and Damen, Dima and Sivic, Josef}, title = {ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual Instructions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27435-27445} }
Exploration-Driven Generative Interactive Environments: Nedko Savov,

Naser Kazemi,

Mohammad Mahdi,

Danda Pani Paudel,

Xi Wang,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Savov_2025_CVPR, author = {Savov, Nedko and Kazemi, Naser and Mahdi, Mohammad and Paudel, Danda Pani and Wang, Xi and Van Gool, Luc}, title = {Exploration-Driven Generative Interactive Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27597-27607} }
Task-Agnostic Guided Feature Expansion for Class-Incremental Learning: Bowen Zheng,

Da-Wei Zhou,

Han-Jia Ye,

De-Chuan Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Bowen and Zhou, Da-Wei and Ye, Han-Jia and Zhan, De-Chuan}, title = {Task-Agnostic Guided Feature Expansion for Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10099-10109} }
ShowUI: One Vision-Language-Action Model for GUI Visual Agent: Kevin Qinghong Lin,

Linjie Li,

Difei Gao,

Zhengyuan Yang,

Shiwei Wu,

Zechen Bai,

Stan Weixian Lei,

Lijuan Wang,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Kevin Qinghong and Li, Linjie and Gao, Difei and Yang, Zhengyuan and Wu, Shiwei and Bai, Zechen and Lei, Stan Weixian and Wang, Lijuan and Shou, Mike Zheng}, title = {ShowUI: One Vision-Language-Action Model for GUI Visual Agent}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19498-19508} }
Let's Chorus: Partner-aware Hybrid Song-Driven 3D Head Animation: Xiumei Xie,

Zikai Huang,

Wenhao Xu,

Peng Xiao,

Xuemiao Xu,

Huaidong Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Xiumei and Huang, Zikai and Xu, Wenhao and Xiao, Peng and Xu, Xuemiao and Zhang, Huaidong}, title = {Let's Chorus: Partner-aware Hybrid Song-Driven 3D Head Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5467-5476} }
Twinner: Shining Light on Digital Twins in a Few Snaps: Jesus Zarzar,

Tom Monnier,

Roman Shapovalov,

Andrea Vedaldi,

David Novotny; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zarzar_2025_CVPR, author = {Zarzar, Jesus and Monnier, Tom and Shapovalov, Roman and Vedaldi, Andrea and Novotny, David}, title = {Twinner: Shining Light on Digital Twins in a Few Snaps}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5859-5869} }
Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis: Jian Han,

Jinlai Liu,

Yi Jiang,

Bin Yan,

Yuqi Zhang,

Zehuan Yuan,

Bingyue Peng,

Xiaobing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Jian and Liu, Jinlai and Jiang, Yi and Yan, Bin and Zhang, Yuqi and Yuan, Zehuan and Peng, Bingyue and Liu, Xiaobing}, title = {Infinity: Scaling Bitwise AutoRegressive Modeling for High-Resolution Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15733-15744} }
DreamText: High Fidelity Scene Text Synthesis: Yibin Wang,

Weizhong Zhang,

Honghui Xu,

Cheng Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yibin and Zhang, Weizhong and Xu, Honghui and Jin, Cheng}, title = {DreamText: High Fidelity Scene Text Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28555-28563} }
MonoPlace3D: Learning 3D-Aware Object Placement for 3D Monocular Detection: Rishubh Parihar,

Srinjay Sarkar,

Sarthak Vora,

Jogendra Nath Kundu,

R. Venkatesh Babu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parihar_2025_CVPR, author = {Parihar, Rishubh and Sarkar, Srinjay and Vora, Sarthak and Kundu, Jogendra Nath and Babu, R. Venkatesh}, title = {MonoPlace3D: Learning 3D-Aware Object Placement for 3D Monocular Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6531-6541} }
HumanDreamer: Generating Controllable Human-Motion Videos via Decoupled Generation: Boyuan Wang,

Xiaofeng Wang,

Chaojun Ni,

Guosheng Zhao,

Zhiqin Yang,

Zheng Zhu,

Muyang Zhang,

Yukun Zhou,

Xinze Chen,

Guan Huang,

Lihong Liu,

Xingang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Boyuan and Wang, Xiaofeng and Ni, Chaojun and Zhao, Guosheng and Yang, Zhiqin and Zhu, Zheng and Zhang, Muyang and Zhou, Yukun and Chen, Xinze and Huang, Guan and Liu, Lihong and Wang, Xingang}, title = {HumanDreamer: Generating Controllable Human-Motion Videos via Decoupled Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12391-12401} }
ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos: Tanveer Hannan,

Md Mohaiminul Islam,

Jindong Gu,

Thomas Seidl,

Gedas Bertasius; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hannan_2025_CVPR, author = {Hannan, Tanveer and Islam, Md Mohaiminul and Gu, Jindong and Seidl, Thomas and Bertasius, Gedas}, title = {ReVisionLLM: Recursive Vision-Language Model for Temporal Grounding in Hour-Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19012-19022} }
ArtiFade: Learning to Generate High-quality Subject from Blemished Images: Shuya Yang,

Shaozhe Hao,

Yukang Cao,

Kwan-Yee K. Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Shuya and Hao, Shaozhe and Cao, Yukang and Wong, Kwan-Yee K.}, title = {ArtiFade: Learning to Generate High-quality Subject from Blemished Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13167-13177} }
SGCR: Spherical Gaussians for Efficient 3D Curve Reconstruction: Xinran Yang,

Donghao Ji,

Yuanqi Li,

Jie Guo,

Yanwen Guo,

Junyuan Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Xinran and Ji, Donghao and Li, Yuanqi and Guo, Jie and Guo, Yanwen and Xie, Junyuan}, title = {SGCR: Spherical Gaussians for Efficient 3D Curve Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5793-5803} }
Prompting Depth Anything for 4K Resolution Accurate Metric Depth Estimation: Haotong Lin,

Sida Peng,

Jingxiao Chen,

Songyou Peng,

Jiaming Sun,

Minghuan Liu,

Hujun Bao,

Jiashi Feng,

Xiaowei Zhou,

Bingyi Kang; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Haotong and Peng, Sida and Chen, Jingxiao and Peng, Songyou and Sun, Jiaming and Liu, Minghuan and Bao, Hujun and Feng, Jiashi and Zhou, Xiaowei and Kang, Bingyi}, title = {Prompting Depth Anything for 4K Resolution Accurate Metric Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17070-17080} }
ProKeR: A Kernel Perspective on Few-Shot Adaptation of Large Vision-Language Models: Yassir Bendou,

Amine Ouasfi,

Vincent Gripon,

Adnane Boukhayma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bendou_2025_CVPR, author = {Bendou, Yassir and Ouasfi, Amine and Gripon, Vincent and Boukhayma, Adnane}, title = {ProKeR: A Kernel Perspective on Few-Shot Adaptation of Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25092-25102} }
Advancing Semantic Future Prediction through Multimodal Visual Sequence Transformers: Efstathios Karypidis,

Ioannis Kakogeorgiou,

Spyros Gidaris,

Nikos Komodakis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karypidis_2025_CVPR, author = {Karypidis, Efstathios and Kakogeorgiou, Ioannis and Gidaris, Spyros and Komodakis, Nikos}, title = {Advancing Semantic Future Prediction through Multimodal Visual Sequence Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3793-3803} }
GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery: Enguang Wang,

Zhimao Peng,

Zhengyuan Xie,

Fei Yang,

Xialei Liu,

Ming-Ming Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Enguang and Peng, Zhimao and Xie, Zhengyuan and Yang, Fei and Liu, Xialei and Cheng, Ming-Ming}, title = {GET: Unlocking the Multi-modal Potential of CLIP for Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20296-20306} }
On the Out-Of-Distribution Generalization of Large Multimodal Models: Xingxuan Zhang,

Jiansheng Li,

Wenjing Chu,

junjia hai,

Renzhe Xu,

Yuqing Yang,

Shikai Guan,

Jiazheng Xu,

Liping Jing,

Peng Cui; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xingxuan and Li, Jiansheng and Chu, Wenjing and hai, junjia and Xu, Renzhe and Yang, Yuqing and Guan, Shikai and Xu, Jiazheng and Jing, Liping and Cui, Peng}, title = {On the Out-Of-Distribution Generalization of Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10315-10326} }
Enhanced Contrastive Learning with Multi-view Longitudinal Data for Chest X-ray Report Generation: Kang Liu,

Zhuoqi Ma,

Xiaolu Kang,

Yunan Li,

Kun Xie,

Zhicheng Jiao,

Qiguang Miao; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Kang and Ma, Zhuoqi and Kang, Xiaolu and Li, Yunan and Xie, Kun and Jiao, Zhicheng and Miao, Qiguang}, title = {Enhanced Contrastive Learning with Multi-view Longitudinal Data for Chest X-ray Report Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10348-10359} }
MonoTAKD: Teaching Assistant Knowledge Distillation for Monocular 3D Object Detection: Hou-I Liu,

Christine Wu,

Jen-Hao Cheng,

Wenhao Chai,

Shian-Yun Wang,

Gaowen Liu,

Hugo Latapie,

Jhih-Ciang Wu,

Jenq-Neng Hwang,

Hong-Han Shuai,

Wen-Huang Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hou-I and Wu, Christine and Cheng, Jen-Hao and Chai, Wenhao and Wang, Shian-Yun and Liu, Gaowen and Latapie, Hugo and Wu, Jhih-Ciang and Hwang, Jenq-Neng and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {MonoTAKD: Teaching Assistant Knowledge Distillation for Monocular 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22266-22275} }
Test-Time Domain Generalization via Universe Learning: A Multi-Graph Matching Approach for Medical Image Segmentation: Xingguo Lv,

Xingbo Dong,

Liwen Wang,

Jiewen Yang,

Lei Zhao,

Bin Pu,

Zhe Jin,

Xuejun Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Xingguo and Dong, Xingbo and Wang, Liwen and Yang, Jiewen and Zhao, Lei and Pu, Bin and Jin, Zhe and Li, Xuejun}, title = {Test-Time Domain Generalization via Universe Learning: A Multi-Graph Matching Approach for Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15621-15631} }
Easy-editable Image Vectorization with Multi-layer Multi-scale Distributed Visual Feature Embedding: Ye Chen,

Zhangli Hu,

Zhongyin Zhao,

Yupeng Zhu,

Yue Shi,

Yuxuan Xiong,

Bingbing Ni; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ye and Hu, Zhangli and Zhao, Zhongyin and Zhu, Yupeng and Shi, Yue and Xiong, Yuxuan and Ni, Bingbing}, title = {Easy-editable Image Vectorization with Multi-layer Multi-scale Distributed Visual Feature Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23345-23354} }
Acquire and then Adapt: Squeezing out Text-to-Image Model for Image Restoration: Junyuan Deng,

Xinyi Wu,

Yongxing Yang,

Congchao Zhu,

Song Wang,

Zhenyao Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Junyuan and Wu, Xinyi and Yang, Yongxing and Zhu, Congchao and Wang, Song and Wu, Zhenyao}, title = {Acquire and then Adapt: Squeezing out Text-to-Image Model for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23195-23206} }
DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders: Sizai Hou,

Songze Li,

Duanyi Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2025_CVPR, author = {Hou, Sizai and Li, Songze and Yao, Duanyi}, title = {DeDe: Detecting Backdoor Samples for SSL Encoders via Decoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20675-20684} }
Towards Scalable Human-aligned Benchmark for Text-guided Image Editing: Suho Ryu,

Kihyun Kim,

Eugene Baek,

Dongsoo Shin,

Joonseok Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ryu_2025_CVPR, author = {Ryu, Suho and Kim, Kihyun and Baek, Eugene and Shin, Dongsoo and Lee, Joonseok}, title = {Towards Scalable Human-aligned Benchmark for Text-guided Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18292-18301} }
Devils in Middle Layers of Large Vision-Language Models: Interpreting, Detecting and Mitigating Object Hallucinations via Attention Lens: Zhangqi Jiang,

Junkai Chen,

Beier Zhu,

Tingjin Luo,

Yankun Shen,

Xu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zhangqi and Chen, Junkai and Zhu, Beier and Luo, Tingjin and Shen, Yankun and Yang, Xu}, title = {Devils in Middle Layers of Large Vision-Language Models: Interpreting, Detecting and Mitigating Object Hallucinations via Attention Lens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25004-25014} }
SpectroMotion: Dynamic 3D Reconstruction of Specular Scenes: Cheng-De Fan,

Chen-Wei Chang,

Yi-Ruei Liu,

Jie-Ying Lee,

Jiun-Long Huang,

Yu-Chee Tseng,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Cheng-De and Chang, Chen-Wei and Liu, Yi-Ruei and Lee, Jie-Ying and Huang, Jiun-Long and Tseng, Yu-Chee and Liu, Yu-Lun}, title = {SpectroMotion: Dynamic 3D Reconstruction of Specular Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21328-21338} }
Scaling Inference Time Compute for Diffusion Models: Nanye Ma,

Shangyuan Tong,

Haolin Jia,

Hexiang Hu,

Yu-Chuan Su,

Mingda Zhang,

Xuan Yang,

Yandong Li,

Tommi Jaakkola,

Xuhui Jia,

Saining Xie; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Nanye and Tong, Shangyuan and Jia, Haolin and Hu, Hexiang and Su, Yu-Chuan and Zhang, Mingda and Yang, Xuan and Li, Yandong and Jaakkola, Tommi and Jia, Xuhui and Xie, Saining}, title = {Scaling Inference Time Compute for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2523-2534} }
Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models: Zichen Miao,

Wei Chen,

Qiang Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Miao_2025_CVPR, author = {Miao, Zichen and Chen, Wei and Qiu, Qiang}, title = {Coeff-Tuning: A Graph Filter Subspace View for Tuning Attention-Based Large Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20146-20157} }
VTON 360: High-Fidelity Virtual Try-On from Any Viewing Direction: Zijian He,

Yuwei Ning,

Yipeng Qin,

Guangrun Wang,

Sibei Yang,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Zijian and Ning, Yuwei and Qin, Yipeng and Wang, Guangrun and Yang, Sibei and Lin, Liang and Li, Guanbin}, title = {VTON 360: High-Fidelity Virtual Try-On from Any Viewing Direction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26388-26398} }
MVBoost: Boost 3D Reconstruction with Multi-View Refinement: Xiangyu Liu,

Xiaomei Zhang,

Zhiyuan Ma,

Xiangyu Zhu,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xiangyu and Zhang, Xiaomei and Ma, Zhiyuan and Zhu, Xiangyu and Lei, Zhen}, title = {MVBoost: Boost 3D Reconstruction with Multi-View Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21664-21673} }
Chat-based Person Retrieval via Dialogue-Refined Cross-Modal Alignment: Yang Bai,

Yucheng Ji,

Min Cao,

Jinqiao Wang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Yang and Ji, Yucheng and Cao, Min and Wang, Jinqiao and Ye, Mang}, title = {Chat-based Person Retrieval via Dialogue-Refined Cross-Modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3952-3962} }
Category-Agnostic Neural Object Rigging: Guangzhao He,

Chen Geng,

Shangzhe Wu,

Jiajun Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Guangzhao and Geng, Chen and Wu, Shangzhe and Wu, Jiajun}, title = {Category-Agnostic Neural Object Rigging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22078-22088} }
AVF-MAE++: Scaling Affective Video Facial Masked Autoencoders via Efficient Audio-Visual Self-Supervised Learning: Xuecheng Wu,

Heli Sun,

Yifan Wang,

Jiayu Nie,

Jie Zhang,

Yabing Wang,

Junxiao Xue,

Liang He; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Xuecheng and Sun, Heli and Wang, Yifan and Nie, Jiayu and Zhang, Jie and Wang, Yabing and Xue, Junxiao and He, Liang}, title = {AVF-MAE++: Scaling Affective Video Facial Masked Autoencoders via Efficient Audio-Visual Self-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9142-9153} }
POPEN: Preference-Based Optimization and Ensemble for LVLM-Based Reasoning Segmentation: Lanyun Zhu,

Tianrun Chen,

Qianxiong Xu,

Xuanyi Liu,

Deyi Ji,

Haiyang Wu,

De Wen Soh,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Xu, Qianxiong and Liu, Xuanyi and Ji, Deyi and Wu, Haiyang and Soh, De Wen and Liu, Jun}, title = {POPEN: Preference-Based Optimization and Ensemble for LVLM-Based Reasoning Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30231-30240} }
DiffusionSfM: Predicting Structure and Motion via Ray Origin and Endpoint Diffusion: Qitao Zhao,

Amy Lin,

Jeff Tan,

Jason Y. Zhang,

Deva Ramanan,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Qitao and Lin, Amy and Tan, Jeff and Zhang, Jason Y. and Ramanan, Deva and Tulsiani, Shubham}, title = {DiffusionSfM: Predicting Structure and Motion via Ray Origin and Endpoint Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6317-6326} }
GroundingFace: Fine-grained Face Understanding via Pixel Grounding Multimodal Large Language Model: Yue Han,

Jiangning Zhang,

Junwei Zhu,

Runze Hou,

Xiaozhong Ji,

Chuming Lin,

Xiaobin Hu,

Zhucun Xue,

Yong Liu; [pdf]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Yue and Zhang, Jiangning and Zhu, Junwei and Hou, Runze and Ji, Xiaozhong and Lin, Chuming and Hu, Xiaobin and Xue, Zhucun and Liu, Yong}, title = {GroundingFace: Fine-grained Face Understanding via Pixel Grounding Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3942-3951} }
Self-Supervised Cross-View Correspondence with Predictive Cycle Consistency: Alan Baade,

Changan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Baade_2025_CVPR, author = {Baade, Alan and Chen, Changan}, title = {Self-Supervised Cross-View Correspondence with Predictive Cycle Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16753-16763} }
MMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis: Ho Kei Cheng,

Masato Ishii,

Akio Hayakawa,

Takashi Shibuya,

Alexander Schwing,

Yuki Mitsufuji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Ho Kei and Ishii, Masato and Hayakawa, Akio and Shibuya, Takashi and Schwing, Alexander and Mitsufuji, Yuki}, title = {MMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28901-28911} }
CryptoFace: End-to-End Encrypted Face Recognition: Wei Ao,

Vishnu Naresh Boddeti; [pdf]
[bibtex]
@InProceedings{Ao_2025_CVPR, author = {Ao, Wei and Boddeti, Vishnu Naresh}, title = {CryptoFace: End-to-End Encrypted Face Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19197-19206} }
Relation-Rich Visual Document Generator for Visual Information Extraction: Zi-Han Jiang,

Chien-Wei Lin,

Wei-Hua Li,

Hsuan-Tung Liu,

Yi-Ren Yeh,

Chu-Song Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zi-Han and Lin, Chien-Wei and Li, Wei-Hua and Liu, Hsuan-Tung and Yeh, Yi-Ren and Chen, Chu-Song}, title = {Relation-Rich Visual Document Generator for Visual Information Extraction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14449-14459} }
DynFocus: Dynamic Cooperative Network Empowers LLMs with Video Understanding: Yudong Han,

Qingpei Guo,

Liyuan Pan,

Liu Liu,

Yu Guan,

Ming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Yudong and Guo, Qingpei and Pan, Liyuan and Liu, Liu and Guan, Yu and Yang, Ming}, title = {DynFocus: Dynamic Cooperative Network Empowers LLMs with Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8512-8522} }
Mimic In-Context Learning for Multimodal Tasks: Yuchu Jiang,

Jiale Fu,

Chenduo Hao,

Xinting Hu,

Yingzhe Peng,

Xin Geng,

Xu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuchu and Fu, Jiale and Hao, Chenduo and Hu, Xinting and Peng, Yingzhe and Geng, Xin and Yang, Xu}, title = {Mimic In-Context Learning for Multimodal Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29825-29835} }
PromptHash:Affinity-Prompted Collaborative Cross-Modal Learning for Adaptive Hashing Retrieval: Qiang Zou,

Shuli Cheng,

Jiayi Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zou_2025_CVPR, author = {Zou, Qiang and Cheng, Shuli and Chen, Jiayi}, title = {PromptHash:Affinity-Prompted Collaborative Cross-Modal Learning for Adaptive Hashing Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19649-19658} }
V-Stylist: Video Stylization via Collaboration and Reflection of MLLM Agents: Zhengrong Yue,

Shaobin Zhuang,

Kunchang Li,

Yanbo Ding,

Yali Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yue_2025_CVPR, author = {Yue, Zhengrong and Zhuang, Shaobin and Li, Kunchang and Ding, Yanbo and Wang, Yali}, title = {V-Stylist: Video Stylization via Collaboration and Reflection of MLLM Agents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3195-3205} }
Vision-Language Models Do Not Understand Negation: Kumail Alhamoud,

Shaden Alshammari,

Yonglong Tian,

Guohao Li,

Philip H.S. Torr,

Yoon Kim,

Marzyeh Ghassemi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alhamoud_2025_CVPR, author = {Alhamoud, Kumail and Alshammari, Shaden and Tian, Yonglong and Li, Guohao and Torr, Philip H.S. and Kim, Yoon and Ghassemi, Marzyeh}, title = {Vision-Language Models Do Not Understand Negation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29612-29622} }
ID-Patch: Robust ID Association for Group Photo Personalization: Yimeng Zhang,

Tiancheng Zhi,

Jing Liu,

Shen Sang,

Liming Jiang,

Qing Yan,

Sijia Liu,

Linjie Luo; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yimeng and Zhi, Tiancheng and Liu, Jing and Sang, Shen and Jiang, Liming and Yan, Qing and Liu, Sijia and Luo, Linjie}, title = {ID-Patch: Robust ID Association for Group Photo Personalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2986-2996} }
iG-6DoF: Model-free 6DoF Pose Estimation for Unseen Object via Iterative 3D Gaussian Splatting: Tuo Cao,

Fei Luo,

Jiongming Qin,

Yu Jiang,

Yusen Wang,

Chunxia Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Tuo and Luo, Fei and Qin, Jiongming and Jiang, Yu and Wang, Yusen and Xiao, Chunxia}, title = {iG-6DoF: Model-free 6DoF Pose Estimation for Unseen Object via Iterative 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6436-6446} }
NexusGS: Sparse View Synthesis with Epipolar Depth Priors in 3D Gaussian Splatting: Yulong Zheng,

Zicheng Jiang,

Shengfeng He,

Yandu Sun,

Junyu Dong,

Huaidong Zhang,

Yong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Yulong and Jiang, Zicheng and He, Shengfeng and Sun, Yandu and Dong, Junyu and Zhang, Huaidong and Du, Yong}, title = {NexusGS: Sparse View Synthesis with Epipolar Depth Priors in 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26800-26809} }
HyperNet Fields: Efficiently Training Hypernetworks without Ground Truth by Learning Weight Trajectories: Eric Hedlin,

Munawar Hayat,

Fatih Porikli,

Kwang Moo Yi,

Shweta Mahajan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hedlin_2025_CVPR, author = {Hedlin, Eric and Hayat, Munawar and Porikli, Fatih and Yi, Kwang Moo and Mahajan, Shweta}, title = {HyperNet Fields: Efficiently Training Hypernetworks without Ground Truth by Learning Weight Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22129-22138} }
Universal Scene Graph Generation: Shengqiong Wu,

Hao Fei,

Tat-seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Shengqiong and Fei, Hao and Chua, Tat-seng}, title = {Universal Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14158-14168} }
RICCARDO: Radar Hit Prediction and Convolution for Camera-Radar 3D Object Detection: Yunfei Long,

Abhinav Kumar,

Xiaoming Liu,

Daniel Morris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2025_CVPR, author = {Long, Yunfei and Kumar, Abhinav and Liu, Xiaoming and Morris, Daniel}, title = {RICCARDO: Radar Hit Prediction and Convolution for Camera-Radar 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22276-22285} }
ForestLPR: LiDAR Place Recognition in Forests Attentioning Multiple BEV Density Images: Yanqing Shen,

Turcan Tuna,

Marco Hutter,

Cesar Cadena,

Nanning Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Yanqing and Tuna, Turcan and Hutter, Marco and Cadena, Cesar and Zheng, Nanning}, title = {ForestLPR: LiDAR Place Recognition in Forests Attentioning Multiple BEV Density Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6659-6669} }
BLADE: Single-view Body Mesh Estimation through Accurate Depth Estimation: Shengze Wang,

Jiefeng Li,

Tianye Li,

Ye Yuan,

Henry Fuchs,

Koki Nagano,

Shalini De Mello,

Michael Stengel; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shengze and Li, Jiefeng and Li, Tianye and Yuan, Ye and Fuchs, Henry and Nagano, Koki and De Mello, Shalini and Stengel, Michael}, title = {BLADE: Single-view Body Mesh Estimation through Accurate Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21991-22000} }
AdaMMS: Model Merging for Heterogeneous Multimodal Large Language Models with Unsupervised Coefficient Optimization: Yiyang Du,

Xiaochen Wang,

Chi Chen,

Jiabo Ye,

Yiru Wang,

Peng Li,

Ming Yan,

Ji Zhang,

Fei Huang,

Zhifang Sui,

Maosong Sun,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Yiyang and Wang, Xiaochen and Chen, Chi and Ye, Jiabo and Wang, Yiru and Li, Peng and Yan, Ming and Zhang, Ji and Huang, Fei and Sui, Zhifang and Sun, Maosong and Liu, Yang}, title = {AdaMMS: Model Merging for Heterogeneous Multimodal Large Language Models with Unsupervised Coefficient Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9413-9422} }
MoEE: Mixture of Emotion Experts for Audio-Driven Portrait Animation: Huaize Liu,

Wenzhang Sun,

Donglin Di,

Shibo Sun,

Jiahui Yang,

Changqing Zou,

Hujun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Huaize and Sun, Wenzhang and Di, Donglin and Sun, Shibo and Yang, Jiahui and Zou, Changqing and Bao, Hujun}, title = {MoEE: Mixture of Emotion Experts for Audio-Driven Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26222-26231} }
ReCap: Better Gaussian Relighting with Cross-Environment Captures: Jingzhi Li,

Zongwei Wu,

Eduard Zamfir,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jingzhi and Wu, Zongwei and Zamfir, Eduard and Timofte, Radu}, title = {ReCap: Better Gaussian Relighting with Cross-Environment Captures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21307-21316} }
Split Adaptation for Pre-trained Vision Transformers: Lixu Wang,

Bingqi Shang,

Yi Li,

Payal Mohapatra,

Wei Dong,

Xiao Wang,

Qi Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lixu and Shang, Bingqi and Li, Yi and Mohapatra, Payal and Dong, Wei and Wang, Xiao and Zhu, Qi}, title = {Split Adaptation for Pre-trained Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20092-20102} }
SpatialLLM: A Compound 3D-Informed Design towards Spatially-Intelligent Large Multimodal Models: Wufei Ma,

Luoxin Ye,

Celso M de Melo,

Alan Yuille,

Jieneng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Wufei and Ye, Luoxin and de Melo, Celso M and Yuille, Alan and Chen, Jieneng}, title = {SpatialLLM: A Compound 3D-Informed Design towards Spatially-Intelligent Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17249-17260} }
SLVR: Super-Light Visual Reconstruction via Blueprint Controllable Convolutions and Exploring Feature Diversity Representation: Ning Ni,

Libao Zhang; [pdf]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Ning and Zhang, Libao}, title = {SLVR: Super-Light Visual Reconstruction via Blueprint Controllable Convolutions and Exploring Feature Diversity Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {400-410} }
Vision-Language Embodiment for Monocular Depth Estimation: Jinchang Zhang,

Guoyu Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinchang and Lu, Guoyu}, title = {Vision-Language Embodiment for Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29479-29489} }
Layered Image Vectorization via Semantic Simplification: Zhenyu Wang,

Jianxi Huang,

Zhida Sun,

Yuanhao Gong,

Daniel Cohen-Or,

Min Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenyu and Huang, Jianxi and Sun, Zhida and Gong, Yuanhao and Cohen-Or, Daniel and Lu, Min}, title = {Layered Image Vectorization via Semantic Simplification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7728-7738} }
Learning Occlusion-Robust Vision Transformers for Real-Time UAV Tracking: You Wu,

Xucheng Wang,

Xiangyang Yang,

Mengyuan Liu,

Dan Zeng,

Hengzhou Ye,

Shuiwang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, You and Wang, Xucheng and Yang, Xiangyang and Liu, Mengyuan and Zeng, Dan and Ye, Hengzhou and Li, Shuiwang}, title = {Learning Occlusion-Robust Vision Transformers for Real-Time UAV Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17103-17113} }
Plug-and-Play Versatile Compressed Video Enhancement: Huimin Zeng,

Jiacheng Li,

Zhiwei Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Huimin and Li, Jiacheng and Xiong, Zhiwei}, title = {Plug-and-Play Versatile Compressed Video Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17767-17777} }
UltraFusion: Ultra High Dynamic Imaging using Exposure Fusion: Zixuan Chen,

Yujin Wang,

Xin Cai,

Zhiyuan You,

Zheming Lu,

Fan Zhang,

Shi Guo,

Tianfan Xue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Wang, Yujin and Cai, Xin and You, Zhiyuan and Lu, Zheming and Zhang, Fan and Guo, Shi and Xue, Tianfan}, title = {UltraFusion: Ultra High Dynamic Imaging using Exposure Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16111-16121} }
Hearing Anywhere in Any Environment: Xiulong Liu,

Anurag Kumar,

Paul Calamia,

Sebastia V. Amengual,

Calvin Murdock,

Ishwarya Ananthabhotla,

Philip Robinson,

Eli Shlizerman,

Vamsi Krishna Ithapu,

Ruohan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xiulong and Kumar, Anurag and Calamia, Paul and Amengual, Sebastia V. and Murdock, Calvin and Ananthabhotla, Ishwarya and Robinson, Philip and Shlizerman, Eli and Ithapu, Vamsi Krishna and Gao, Ruohan}, title = {Hearing Anywhere in Any Environment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5732-5741} }
Automated Proof of Polynomial Inequalities via Reinforcement Learning: Banglong Liu,

Niuniu Qi,

Xia Zeng,

Lydia Dehbi,

Zhengfeng Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Banglong and Qi, Niuniu and Zeng, Xia and Dehbi, Lydia and Yang, Zhengfeng}, title = {Automated Proof of Polynomial Inequalities via Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5052-5060} }
Noise-Resistant Video Anomaly Detection via RGB Error-Guided Multiscale Predictive Coding and Dynamic Memory: Han Hu,

Wenli Du,

Peng Liao,

Bing Wang,

Siyuan Fan; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Han and Du, Wenli and Liao, Peng and Wang, Bing and Fan, Siyuan}, title = {Noise-Resistant Video Anomaly Detection via RGB Error-Guided Multiscale Predictive Coding and Dynamic Memory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19109-19119} }
Frequency Dynamic Convolution for Dense Image Prediction: Linwei Chen,

Lin Gu,

Liang Li,

Chenggang Yan,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Linwei and Gu, Lin and Li, Liang and Yan, Chenggang and Fu, Ying}, title = {Frequency Dynamic Convolution for Dense Image Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30178-30188} }
IDEA: Inverted Text with Cooperative Deformable Aggregation for Multi-modal Object Re-Identification: Yuhao Wang,

Yongfeng Lv,

Pingping Zhang,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhao and Lv, Yongfeng and Zhang, Pingping and Lu, Huchuan}, title = {IDEA: Inverted Text with Cooperative Deformable Aggregation for Multi-modal Object Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29701-29710} }
SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2% Training Cost: Haiyang Mei,

Pengyu Zhang,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Haiyang and Zhang, Pengyu and Shou, Mike Zheng}, title = {SAM-I2V: Upgrading SAM to Support Promptable Video Segmentation with Less than 0.2\% Training Cost}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {3417-3426} }
GroupMamba: Efficient Group-Based Visual State Space Model: Abdelrahman Shaker,

Syed Talal Wasim,

Salman Khan,

Juergen Gall,

Fahad Shahbaz Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shaker_2025_CVPR, author = {Shaker, Abdelrahman and Wasim, Syed Talal and Khan, Salman and Gall, Juergen and Khan, Fahad Shahbaz}, title = {GroupMamba: Efficient Group-Based Visual State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14912-14922} }
How Do I Do That? Synthesizing 3D Hand Motion and Contacts for Everyday Interactions: Aditya Prakash,

Benjamin Lundell,

Dmitry Andreychuk,

David Forsyth,

Saurabh Gupta,

Harpreet Sawhney; [pdf] [arXiv]
[bibtex]
@InProceedings{Prakash_2025_CVPR, author = {Prakash, Aditya and Lundell, Benjamin and Andreychuk, Dmitry and Forsyth, David and Gupta, Saurabh and Sawhney, Harpreet}, title = {How Do I Do That? Synthesizing 3D Hand Motion and Contacts for Everyday Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7026-7036} }
Escaping Plato's Cave: Towards the Alignment of 3D and Text Latent Spaces: Souhail Hadgi,

Luca Moschella,

Andrea Santilli,

Diego Gomez,

Qixing Huang,

Emanuele Rodolà,

Simone Melzi,

Maks Ovsjanikov; [pdf] [supp]
[bibtex]
@InProceedings{Hadgi_2025_CVPR, author = {Hadgi, Souhail and Moschella, Luca and Santilli, Andrea and Gomez, Diego and Huang, Qixing and Rodol\`a, Emanuele and Melzi, Simone and Ovsjanikov, Maks}, title = {Escaping Plato's Cave: Towards the Alignment of 3D and Text Latent Spaces}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19825-19835} }
Consistency Posterior Sampling for Diverse Image Synthesis: Vishal Purohit,

Matthew Repasky,

Jianfeng Lu,

Qiang Qiu,

Yao Xie,

Xiuyuan Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Purohit_2025_CVPR, author = {Purohit, Vishal and Repasky, Matthew and Lu, Jianfeng and Qiu, Qiang and Xie, Yao and Cheng, Xiuyuan}, title = {Consistency Posterior Sampling for Diverse Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28327-28336} }
IMFine: 3D Inpainting via Geometry-guided Multi-view Refinement: Zhihao Shi,

Dong Huo,

Yuhongze Zhou,

Yan Min,

Juwei Lu,

Xinxin Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Zhihao and Huo, Dong and Zhou, Yuhongze and Min, Yan and Lu, Juwei and Zuo, Xinxin}, title = {IMFine: 3D Inpainting via Geometry-guided Multi-view Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26694-26703} }
ActiveGAMER: Active GAussian Mapping through Efficient Rendering: Liyan Chen,

Huangying Zhan,

Kevin Chen,

Xiangyu Xu,

Qingan Yan,

Changjiang Cai,

Yi Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Liyan and Zhan, Huangying and Chen, Kevin and Xu, Xiangyu and Yan, Qingan and Cai, Changjiang and Xu, Yi}, title = {ActiveGAMER: Active GAussian Mapping through Efficient Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16486-16497} }
DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge: Sabbir Ahmed,

Abdullah Al Arafat,

Deniz Najafi,

Akhlak Mahmood,

Mamshad Nayeem Rizve,

Mohaiminul Al Nahian,

Ranyang Zhou,

Shaahin Angizi,

Adnan Siraj Rakin; [pdf]
[bibtex]
@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Sabbir and Al Arafat, Abdullah and Najafi, Deniz and Mahmood, Akhlak and Rizve, Mamshad Nayeem and Al Nahian, Mohaiminul and Zhou, Ranyang and Angizi, Shaahin and Rakin, Adnan Siraj}, title = {DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30147-30156} }
EvOcc: Accurate Semantic Occupancy for Automated Driving Using Evidence Theory: Jonas Kälble,

Sascha Wirges,

Maxim Tatarchenko,

Eddy Ilg; [pdf] [supp]
[bibtex]
@InProceedings{Kalble_2025_CVPR, author = {K\"alble, Jonas and Wirges, Sascha and Tatarchenko, Maxim and Ilg, Eddy}, title = {EvOcc: Accurate Semantic Occupancy for Automated Driving Using Evidence Theory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27467-27476} }
Positive2Negative: Breaking the Information-Lossy Barrier in Self-Supervised Single Image Denoising: Tong Li,

Lizhi Wang,

Zhiyuan Xu,

Lin Zhu,

Wanxuan Lu,

Hua Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Tong and Wang, Lizhi and Xu, Zhiyuan and Zhu, Lin and Lu, Wanxuan and Huang, Hua}, title = {Positive2Negative: Breaking the Information-Lossy Barrier in Self-Supervised Single Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17924-17934} }
Towards Continual Universal Segmentation: Zihan Lin,

Zilei Wang,

Xu Wang; [pdf]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Zihan and Wang, Zilei and Wang, Xu}, title = {Towards Continual Universal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29417-29427} }
PGC: Physics-Based Gaussian Cloth from a Single Pose: Michelle Guo,

Matt Jen-Yuan Chiang,

Igor Santesteban,

Nikolaos Sarafianos,

Hsiao-yu Chen,

Oshri Halimi,

Aljaž Božič,

Shunsuke Saito,

Jiajun Wu,

C. Karen Liu,

Tuur Stuyck,

Egor Larionov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Michelle and Chiang, Matt Jen-Yuan and Santesteban, Igor and Sarafianos, Nikolaos and Chen, Hsiao-yu and Halimi, Oshri and Bo\v{z}i\v{c}, Alja\v{z} and Saito, Shunsuke and Wu, Jiajun and Liu, C. Karen and Stuyck, Tuur and Larionov, Egor}, title = {PGC: Physics-Based Gaussian Cloth from a Single Pose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21215-21225} }
Joint Vision-Language Social Bias Removal for CLIP: Haoyu Zhang,

Yangyang Guo,

Mohan Kankanhalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haoyu and Guo, Yangyang and Kankanhalli, Mohan}, title = {Joint Vision-Language Social Bias Removal for CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4246-4255} }
MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds: Zhenggang Tang,

Yuchen Fan,

Dilin Wang,

Hongyu Xu,

Rakesh Ranjan,

Alexander Schwing,

Zhicheng Yan; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Zhenggang and Fan, Yuchen and Wang, Dilin and Xu, Hongyu and Ranjan, Rakesh and Schwing, Alexander and Yan, Zhicheng}, title = {MV-DUSt3R+: Single-Stage Scene Reconstruction from Sparse Views In 2 Seconds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5283-5293} }
Explicit Depth-Aware Blurry Video Frame Interpolation Guided by Differential Curves: Zaoming Yan,

Pengcheng Lei,

Tingting Wang,

Faming Fang,

Junkang Zhang,

Yaomin Huang,

Haichuan Song; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Zaoming and Lei, Pengcheng and Wang, Tingting and Fang, Faming and Zhang, Junkang and Huang, Yaomin and Song, Haichuan}, title = {Explicit Depth-Aware Blurry Video Frame Interpolation Guided by Differential Curves}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1994-2004} }
OFER: Occluded Face Expression Reconstruction: Pratheba Selvaraju,

Victoria Fernandez Abrevaya,

Timo Bolkart,

Rick Akkerman,

Tianyu Ding,

Faezeh Amjadi,

Ilya Zharkov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Selvaraju_2025_CVPR, author = {Selvaraju, Pratheba and Abrevaya, Victoria Fernandez and Bolkart, Timo and Akkerman, Rick and Ding, Tianyu and Amjadi, Faezeh and Zharkov, Ilya}, title = {OFER: Occluded Face Expression Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26985-26995} }
SeaLion: Semantic Part-Aware Latent Point Diffusion Models for 3D Generation: Dekai Zhu,

Yan Di,

Stefan Gavranovic,

Slobodan Ilic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Dekai and Di, Yan and Gavranovic, Stefan and Ilic, Slobodan}, title = {SeaLion: Semantic Part-Aware Latent Point Diffusion Models for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11789-11798} }
MonSter: Marry Monodepth to Stereo Unleashes Power: Junda Cheng,

Longliang Liu,

Gangwei Xu,

Xianqi Wang,

Zhaoxing Zhang,

Yong Deng,

Jinliang Zang,

Yurui Chen,

Zhipeng Cai,

Xin Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Junda and Liu, Longliang and Xu, Gangwei and Wang, Xianqi and Zhang, Zhaoxing and Deng, Yong and Zang, Jinliang and Chen, Yurui and Cai, Zhipeng and Yang, Xin}, title = {MonSter: Marry Monodepth to Stereo Unleashes Power}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6273-6282} }
Toward Real-world BEV Perception: Depth Uncertainty Estimation via Gaussian Splatting: Shu-Wei Lu,

Yi-Hsuan Tsai,

Yi-Ting Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Shu-Wei and Tsai, Yi-Hsuan and Chen, Yi-Ting}, title = {Toward Real-world BEV Perception: Depth Uncertainty Estimation via Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17124-17133} }
Efficient Fine-Tuning and Concept Suppression for Pruned Diffusion Models: Reza Shirkavand,

Peiran Yu,

Shangqian Gao,

Gowthami Somepalli,

Tom Goldstein,

Heng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shirkavand_2025_CVPR, author = {Shirkavand, Reza and Yu, Peiran and Gao, Shangqian and Somepalli, Gowthami and Goldstein, Tom and Huang, Heng}, title = {Efficient Fine-Tuning and Concept Suppression for Pruned Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18619-18629} }
Cubify Anything: Scaling Indoor 3D Object Detection: Justin Lazarow,

David Griffiths,

Gefen Kohavi,

Francisco Crespo,

Afshin Dehghan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lazarow_2025_CVPR, author = {Lazarow, Justin and Griffiths, David and Kohavi, Gefen and Crespo, Francisco and Dehghan, Afshin}, title = {Cubify Anything: Scaling Indoor 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22225-22233} }
WildGS-SLAM: Monocular Gaussian Splatting SLAM in Dynamic Environments: Jianhao Zheng,

Zihan Zhu,

Valentin Bieri,

Marc Pollefeys,

Songyou Peng,

Iro Armeni; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Jianhao and Zhu, Zihan and Bieri, Valentin and Pollefeys, Marc and Peng, Songyou and Armeni, Iro}, title = {WildGS-SLAM: Monocular Gaussian Splatting SLAM in Dynamic Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11461-11471} }
A Tale of Two Classes: Adapting Supervised Contrastive Learning to Binary Imbalanced Datasets: David Mildenberger,

Paul Hager,

Daniel Rueckert,

Martin J. Menten; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mildenberger_2025_CVPR, author = {Mildenberger, David and Hager, Paul and Rueckert, Daniel and Menten, Martin J.}, title = {A Tale of Two Classes: Adapting Supervised Contrastive Learning to Binary Imbalanced Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10305-10314} }
DEAL: Data-Efficient Adversarial Learning for High-Quality Infrared Imaging: Zhu Liu,

Zijun Wang,

Jinyuan Liu,

Fanqi Meng,

Long Ma,

Risheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhu and Wang, Zijun and Liu, Jinyuan and Meng, Fanqi and Ma, Long and Liu, Risheng}, title = {DEAL: Data-Efficient Adversarial Learning for High-Quality Infrared Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28198-28207} }
RePerformer: Immersive Human-centric Volumetric Videos from Playback to Photoreal Reperformance: Yuheng Jiang,

Zhehao Shen,

Chengcheng Guo,

Yu Hong,

Zhuo Su,

Yingliang Zhang,

Marc Habermann,

Lan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuheng and Shen, Zhehao and Guo, Chengcheng and Hong, Yu and Su, Zhuo and Zhang, Yingliang and Habermann, Marc and Xu, Lan}, title = {RePerformer: Immersive Human-centric Volumetric Videos from Playback to Photoreal Reperformance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11349-11360} }
CheXWorld: Exploring Image World Modeling for Radiograph Representation Learning: Yang Yue,

Yulin Wang,

Chenxin Tao,

Pan Liu,

Shiji Song,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2025_CVPR, author = {Yue, Yang and Wang, Yulin and Tao, Chenxin and Liu, Pan and Song, Shiji and Huang, Gao}, title = {CheXWorld: Exploring Image World Modeling for Radiograph Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20778-20788} }
Towards Long-Horizon Vision-Language Navigation: Platform, Benchmark and Method: Xinshuai Song,

Weixing Chen,

Yang Liu,

Weikai Chen,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Xinshuai and Chen, Weixing and Liu, Yang and Chen, Weikai and Li, Guanbin and Lin, Liang}, title = {Towards Long-Horizon Vision-Language Navigation: Platform, Benchmark and Method}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12078-12088} }
Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection: Yun Zhu,

Le Hui,

Hang Yang,

Jianjun Qian,

Jin Xie,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yun and Hui, Le and Yang, Hang and Qian, Jianjun and Xie, Jin and Yang, Jian}, title = {Learning Class Prototypes for Unified Sparse-Supervised 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9911-9920} }
BimArt: A Unified Approach for the Synthesis of 3D Bimanual Interaction with Articulated Objects: Wanyue Zhang,

Rishabh Dabral,

Vladislav Golyanik,

Vasileios Choutas,

Eduardo Alvarado,

Thabo Beeler,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wanyue and Dabral, Rishabh and Golyanik, Vladislav and Choutas, Vasileios and Alvarado, Eduardo and Beeler, Thabo and Habermann, Marc and Theobalt, Christian}, title = {BimArt: A Unified Approach for the Synthesis of 3D Bimanual Interaction with Articulated Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27694-27705} }
Open-World Amodal Appearance Completion: Jiayang Ao,

Yanbei Jiang,

Qiuhong Ke,

Krista A. Ehinger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ao_2025_CVPR, author = {Ao, Jiayang and Jiang, Yanbei and Ke, Qiuhong and Ehinger, Krista A.}, title = {Open-World Amodal Appearance Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6490-6499} }
AIGV-Assessor: Benchmarking and Evaluating the Perceptual Quality of Text-to-Video Generation with LMM: Jiarui Wang,

Huiyu Duan,

Guangtao Zhai,

Juntong Wang,

Xiongkuo Min; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jiarui and Duan, Huiyu and Zhai, Guangtao and Wang, Juntong and Min, Xiongkuo}, title = {AIGV-Assessor: Benchmarking and Evaluating the Perceptual Quality of Text-to-Video Generation with LMM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18869-18880} }
CoSpace: Benchmarking Continuous Space Perception Ability for Vision-Language Models: Yiqi Zhu,

Ziyue Wang,

Can Zhang,

Peng Li,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yiqi and Wang, Ziyue and Zhang, Can and Li, Peng and Liu, Yang}, title = {CoSpace: Benchmarking Continuous Space Perception Ability for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29569-29579} }
Autoregressive Distillation of Diffusion Transformers: Yeongmin Kim,

Sotiris Anagnostidis,

Yuming Du,

Edgar Schönfeld,

Jonas Kohler,

Markos Georgopoulos,

Albert Pumarola,

Ali Thabet,

Artsiom Sanakoyeu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Yeongmin and Anagnostidis, Sotiris and Du, Yuming and Sch\"onfeld, Edgar and Kohler, Jonas and Georgopoulos, Markos and Pumarola, Albert and Thabet, Ali and Sanakoyeu, Artsiom}, title = {Autoregressive Distillation of Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15745-15756} }
RivuletMLP: An MLP-based Architecture for Efficient Compressed Video Quality Enhancement: Gang He,

Weiran Wang,

Guancheng Quan,

Shihao Wang,

Dajiang Zhou,

Yunsong Li; [pdf]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Gang and Wang, Weiran and Quan, Guancheng and Wang, Shihao and Zhou, Dajiang and Li, Yunsong}, title = {RivuletMLP: An MLP-based Architecture for Efficient Compressed Video Quality Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7342-7352} }
OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints: Mingjie Pan,

Jiyao Zhang,

Tianshu Wu,

Yinghao Zhao,

Wenlong Gao,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Mingjie and Zhang, Jiyao and Wu, Tianshu and Zhao, Yinghao and Gao, Wenlong and Dong, Hao}, title = {OmniManip: Towards General Robotic Manipulation via Object-Centric Interaction Primitives as Spatial Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17359-17369} }
FreeTimeGS: Free Gaussian Primitives at Anytime Anywhere for Dynamic Scene Reconstruction: Yifan Wang,

Peishan Yang,

Zhen Xu,

Jiaming Sun,

Zhanhua Zhang,

Yong Chen,

Hujun Bao,

Sida Peng,

Xiaowei Zhou; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yifan and Yang, Peishan and Xu, Zhen and Sun, Jiaming and Zhang, Zhanhua and Chen, Yong and Bao, Hujun and Peng, Sida and Zhou, Xiaowei}, title = {FreeTimeGS: Free Gaussian Primitives at Anytime Anywhere for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21750-21760} }
Show and Tell: Visually Explainable Deep Neural Nets via Spatially-Aware Concept Bottleneck Models: Itay Benou,

Tammy Riklin Raviv; [pdf] [supp]
[bibtex]
@InProceedings{Benou_2025_CVPR, author = {Benou, Itay and Raviv, Tammy Riklin}, title = {Show and Tell: Visually Explainable Deep Neural Nets via Spatially-Aware Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30063-30072} }
DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment for Parameter-Efficient Video-Text Retrieval: Leqi Shen,

Guoqiang Gong,

Tianxiang Hao,

Tao He,

Yifeng Zhang,

Pengzhang Liu,

Sicheng Zhao,

Jungong Han,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Leqi and Gong, Guoqiang and Hao, Tianxiang and He, Tao and Zhang, Yifeng and Liu, Pengzhang and Zhao, Sicheng and Han, Jungong and Ding, Guiguang}, title = {DiscoVLA: Discrepancy Reduction in Vision, Language, and Alignment for Parameter-Efficient Video-Text Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19702-19712} }
Reanimating Images using Neural Representations of Dynamic Stimuli: Jacob Yeung,

Andrew F. Luo,

Gabriel Sarch,

Margaret M. Henderson,

Deva Ramanan,

Michael J. Tarr; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yeung_2025_CVPR, author = {Yeung, Jacob and Luo, Andrew F. and Sarch, Gabriel and Henderson, Margaret M. and Ramanan, Deva and Tarr, Michael J.}, title = {Reanimating Images using Neural Representations of Dynamic Stimuli}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {5331-5343} }
Visual-Instructed Degradation Diffusion for All-in-One Image Restoration: Wenyang Luo,

Haina Qin,

Zewen Chen,

Libin Wang,

Dandan Zheng,

Yuming Li,

Yufan Liu,

Bing Li,

Weiming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Wenyang and Qin, Haina and Chen, Zewen and Wang, Libin and Zheng, Dandan and Li, Yuming and Liu, Yufan and Li, Bing and Hu, Weiming}, title = {Visual-Instructed Degradation Diffusion for All-in-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12764-12777} }
Insightful Instance Features for 3D Instance Segmentation: Wonseok Roh,

Hwanhee Jung,

Giljoo Nam,

Dong In Lee,

Hyeongcheol Park,

Sang Ho Yoon,

Jungseock Joo,

Sangpil Kim; [pdf] [supp]
[bibtex]
@InProceedings{Roh_2025_CVPR, author = {Roh, Wonseok and Jung, Hwanhee and Nam, Giljoo and Lee, Dong In and Park, Hyeongcheol and Yoon, Sang Ho and Joo, Jungseock and Kim, Sangpil}, title = {Insightful Instance Features for 3D Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14057-14067} }
Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene: Shengqiong Wu,

Hao Fei,

Jingkang Yang,

Xiangtai Li,

Juncheng Li,

Hanwang Zhang,

Tat-seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Shengqiong and Fei, Hao and Yang, Jingkang and Li, Xiangtai and Li, Juncheng and Zhang, Hanwang and Chua, Tat-seng}, title = {Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24539-24549} }
Knowledge Bridger: Towards Training-Free Missing Modality Completion: Guanzhou Ke,

Shengfeng He,

Xiaoli Wang,

Bo Wang,

Guoqing Chao,

Yuanyang Zhang,

Yi Xie,

Hexing Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Guanzhou and He, Shengfeng and Wang, Xiaoli and Wang, Bo and Chao, Guoqing and Zhang, Yuanyang and Xie, Yi and Su, Hexing}, title = {Knowledge Bridger: Towards Training-Free Missing Modality Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25864-25873} }
EmoDubber: Towards High Quality and Emotion Controllable Movie Dubbing: Gaoxiang Cong,

Jiadong Pan,

Liang Li,

Yuankai Qi,

Yuxin Peng,

Anton van den Hengel,

Jian Yang,

Qingming Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Cong_2025_CVPR, author = {Cong, Gaoxiang and Pan, Jiadong and Li, Liang and Qi, Yuankai and Peng, Yuxin and van den Hengel, Anton and Yang, Jian and Huang, Qingming}, title = {EmoDubber: Towards High Quality and Emotion Controllable Movie Dubbing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15863-15873} }
DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos: Wenbo Hu,

Xiangjun Gao,

Xiaoyu Li,

Sijie Zhao,

Xiaodong Cun,

Yong Zhang,

Long Quan,

Ying Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Wenbo and Gao, Xiangjun and Li, Xiaoyu and Zhao, Sijie and Cun, Xiaodong and Zhang, Yong and Quan, Long and Shan, Ying}, title = {DepthCrafter: Generating Consistent Long Depth Sequences for Open-world Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2005-2015} }
TexGarment: Consistent Garment UV Texture Generation via Efficient 3D Structure-Guided Diffusion Transformer: Jialun Liu,

Jinbo Wu,

Xiaobo Gao,

Jiakui Hu,

Bojun Xiong,

Xing Liu,

Chen Zhao,

Hongbin Pei,

Haocheng Feng,

Yingying Li,

Errui Ding,

Jingdong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jialun and Wu, Jinbo and Gao, Xiaobo and Hu, Jiakui and Xiong, Bojun and Liu, Xing and Zhao, Chen and Pei, Hongbin and Feng, Haocheng and Li, Yingying and Ding, Errui and Wang, Jingdong}, title = {TexGarment: Consistent Garment UV Texture Generation via Efficient 3D Structure-Guided Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26566-26575} }
A Hubness Perspective on Representation Learning for Graph-Based Multi-View Clustering: Zheming Xu,

He Liu,

Congyan Lang,

Tao Wang,

Yidong Li,

Michael C. Kampffmeyer; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Zheming and Liu, He and Lang, Congyan and Wang, Tao and Li, Yidong and Kampffmeyer, Michael C.}, title = {A Hubness Perspective on Representation Learning for Graph-Based Multi-View Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15528-15537} }
Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation: Qi Lv,

Hao Li,

Xiang Deng,

Rui Shao,

Yinchuan Li,

Jianye Hao,

Longxiang Gao,

Michael Yu Wang,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2025_CVPR, author = {Lv, Qi and Li, Hao and Deng, Xiang and Shao, Rui and Li, Yinchuan and Hao, Jianye and Gao, Longxiang and Wang, Michael Yu and Nie, Liqiang}, title = {Spatial-Temporal Graph Diffusion Policy with Kinematic Modeling for Bimanual Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17394-17404} }
Semi-Supervised State-Space Model with Dynamic Stacking Filter for Real-World Video Deraining: Shangquan Sun,

Wenqi Ren,

Juxiang Zhou,

Shu Wang,

Jianhou Gan,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Shangquan and Ren, Wenqi and Zhou, Juxiang and Wang, Shu and Gan, Jianhou and Cao, Xiaochun}, title = {Semi-Supervised State-Space Model with Dynamic Stacking Filter for Real-World Video Deraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26114-26124} }
Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face Detector: Xiao Guo,

Xiufeng Song,

Yue Zhang,

Xiaohong Liu,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Xiao and Song, Xiufeng and Zhang, Yue and Liu, Xiaohong and Liu, Xiaoming}, title = {Rethinking Vision-Language Model in Face Forensics: Multi-Modal Interpretable Forged Face Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {105-116} }
TIDE: Training Locally Interpretable Domain Generalization Models Enables Test-time Correction: Aishwarya Agarwal,

Srikrishna Karanam,

Vineet Gandhi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agarwal_2025_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {TIDE: Training Locally Interpretable Domain Generalization Models Enables Test-time Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30210-30220} }
VSNet: Focusing on the Linguistic Characteristics of Sign Language: Yuhao Li,

Xinyue Chen,

Hongkai Li,

Xiaorong Pu,

Peng Jin,

Yazhou Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuhao and Chen, Xinyue and Li, Hongkai and Pu, Xiaorong and Jin, Peng and Ren, Yazhou}, title = {VSNet: Focusing on the Linguistic Characteristics of Sign Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24320-24330} }
Active Hyperspectral Imaging Using an Event Camera: Bohan Yu,

Jinxiu Liang,

Zhuofeng Wang,

Bin Fan,

Art Subpa-asa,

Boxin Shi,

Imari Sato; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Bohan and Liang, Jinxiu and Wang, Zhuofeng and Fan, Bin and Subpa-asa, Art and Shi, Boxin and Sato, Imari}, title = {Active Hyperspectral Imaging Using an Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {929-939} }
Bridging the Gap between Gaussian Diffusion Models and Universal Quantization for Image Compression: Lucas Relic,

Roberto Azevedo,

Yang Zhang,

Markus Gross,

Christopher Schroers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Relic_2025_CVPR, author = {Relic, Lucas and Azevedo, Roberto and Zhang, Yang and Gross, Markus and Schroers, Christopher}, title = {Bridging the Gap between Gaussian Diffusion Models and Universal Quantization for Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2449-2458} }
ZeroVO: Visual Odometry with Minimal Assumptions: Lei Lai,

Zekai Yin,

Eshed Ohn-Bar; [pdf] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Lei and Yin, Zekai and Ohn-Bar, Eshed}, title = {ZeroVO: Visual Odometry with Minimal Assumptions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17092-17102} }
VideoRefer Suite: Advancing Spatial-Temporal Object Understanding with Video LLM: Yuqian Yuan,

Hang Zhang,

Wentong Li,

Zesen Cheng,

Boqiang Zhang,

Long Li,

Xin Li,

Deli Zhao,

Wenqiao Zhang,

Yueting Zhuang,

Jianke Zhu,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yuqian and Zhang, Hang and Li, Wentong and Cheng, Zesen and Zhang, Boqiang and Li, Long and Li, Xin and Zhao, Deli and Zhang, Wenqiao and Zhuang, Yueting and Zhu, Jianke and Bing, Lidong}, title = {VideoRefer Suite: Advancing Spatial-Temporal Object Understanding with Video LLM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18970-18980} }
Learning to Sample Effective and Diverse Prompts for Text-to-Image Generation: Taeyoung Yun,

Dinghuai Zhang,

Jinkyoo Park,

Ling Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_CVPR, author = {Yun, Taeyoung and Zhang, Dinghuai and Park, Jinkyoo and Pan, Ling}, title = {Learning to Sample Effective and Diverse Prompts for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23625-23635} }
Multi-modal Medical Diagnosis via Large-small Model Collaboration: Wanyi Chen,

Zihua Zhao,

Jiangchao Yao,

Ya Zhang,

Jiajun Bu,

Haishuai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Wanyi and Zhao, Zihua and Yao, Jiangchao and Zhang, Ya and Bu, Jiajun and Wang, Haishuai}, title = {Multi-modal Medical Diagnosis via Large-small Model Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30763-30773} }
SAMBLE: Shape-Specific Point Cloud Sampling for an Optimal Trade-Off Between Local Detail and Global Uniformity: Chengzhi Wu,

Yuxin Wan,

Hao Fu,

Julius Pfrommer,

Zeyun Zhong,

Junwei Zheng,

Jiaming Zhang,

Jürgen Beyerer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Chengzhi and Wan, Yuxin and Fu, Hao and Pfrommer, Julius and Zhong, Zeyun and Zheng, Junwei and Zhang, Jiaming and Beyerer, J\"urgen}, title = {SAMBLE: Shape-Specific Point Cloud Sampling for an Optimal Trade-Off Between Local Detail and Global Uniformity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1342-1352} }
Image Referenced Sketch Colorization Based on Animation Creation Workflow: Dingkun Yan,

Xinrui Wang,

Zhuoru Li,

Suguru Saito,

Yusuke Iwasawa,

Yutaka Matsuo,

Jiaxian Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Dingkun and Wang, Xinrui and Li, Zhuoru and Saito, Suguru and Iwasawa, Yusuke and Matsuo, Yutaka and Guo, Jiaxian}, title = {Image Referenced Sketch Colorization Based on Animation Creation Workflow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23391-23400} }
HoVLE: Unleashing the Power of Monolithic Vision-Language Models with Holistic Vision-Language Embedding: Chenxin Tao,

Shiqian Su,

Xizhou Zhu,

Chenyu Zhang,

Zhe Chen,

Jiawen Liu,

Wenhai Wang,

Lewei Lu,

Gao Huang,

Yu Qiao,

Jifeng Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tao_2025_CVPR, author = {Tao, Chenxin and Su, Shiqian and Zhu, Xizhou and Zhang, Chenyu and Chen, Zhe and Liu, Jiawen and Wang, Wenhai and Lu, Lewei and Huang, Gao and Qiao, Yu and Dai, Jifeng}, title = {HoVLE: Unleashing the Power of Monolithic Vision-Language Models with Holistic Vision-Language Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14559-14569} }
Gen3DEval: Using vLLMs for Automatic Evaluation of Generated 3D Objects: Shalini Maiti,

Lourdes Agapito,

Filippos Kokkinos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maiti_2025_CVPR, author = {Maiti, Shalini and Agapito, Lourdes and Kokkinos, Filippos}, title = {Gen3DEval: Using vLLMs for Automatic Evaluation of Generated 3D Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18552-18562} }
SkySense-O: Towards Open-World Remote Sensing Interpretation with Vision-Centric Visual-Language Modeling: Qi Zhu,

Jiangwei Lao,

Deyi Ji,

Junwei Luo,

Kang Wu,

Yingying Zhang,

Lixiang Ru,

Jian Wang,

Jingdong Chen,

Ming Yang,

Dong Liu,

Feng Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Qi and Lao, Jiangwei and Ji, Deyi and Luo, Junwei and Wu, Kang and Zhang, Yingying and Ru, Lixiang and Wang, Jian and Chen, Jingdong and Yang, Ming and Liu, Dong and Zhao, Feng}, title = {SkySense-O: Towards Open-World Remote Sensing Interpretation with Vision-Centric Visual-Language Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14733-14744} }
AdaDARE-gamma: Balancing Stability and Plasticity in Multi-modal LLMs through Efficient Adaptation: Jingyi Xie,

Jintao Yang,

Zhunchen Luo,

Yunbo Cao,

Qiang Gao,

Mengyuan Zhang,

Wenpeng Hu; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Jingyi and Yang, Jintao and Luo, Zhunchen and Cao, Yunbo and Gao, Qiang and Zhang, Mengyuan and Hu, Wenpeng}, title = {AdaDARE-gamma: Balancing Stability and Plasticity in Multi-modal LLMs through Efficient Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19758-19768} }
Driving by the Rules: A Benchmark for Integrating Traffic Sign Regulations into Vectorized HD Map: Xinyuan Chang,

Maixuan Xue,

Xinran Liu,

Zheng Pan,

Xing Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Xinyuan and Xue, Maixuan and Liu, Xinran and Pan, Zheng and Wei, Xing}, title = {Driving by the Rules: A Benchmark for Integrating Traffic Sign Regulations into Vectorized HD Map}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6823-6833} }
LeviTor: 3D Trajectory Oriented Image-to-Video Synthesis: Hanlin Wang,

Hao Ouyang,

Qiuyu Wang,

Wen Wang,

Ka Leong Cheng,

Qifeng Chen,

Yujun Shen,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hanlin and Ouyang, Hao and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Chen, Qifeng and Shen, Yujun and Wang, Limin}, title = {LeviTor: 3D Trajectory Oriented Image-to-Video Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12490-12500} }
GaPT-DAR: Category-level Garments Pose Tracking via Integrated 2D Deformation and 3D Reconstruction: Li Zhang,

Mingliang Xu,

Jianan Wang,

Qiaojun Yu,

Lixin Yang,

Yonglu Li,

Cewu Lu,

Rujing Wang,

Liu Liu; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Li and Xu, Mingliang and Wang, Jianan and Yu, Qiaojun and Yang, Lixin and Li, Yonglu and Lu, Cewu and Wang, Rujing and Liu, Liu}, title = {GaPT-DAR: Category-level Garments Pose Tracking via Integrated 2D Deformation and 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22638-22647} }
ProbPose: A Probabilistic Approach to 2D Human Pose Estimation: Miroslav Purkrabek,

Jiri Matas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Purkrabek_2025_CVPR, author = {Purkrabek, Miroslav and Matas, Jiri}, title = {ProbPose: A Probabilistic Approach to 2D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27124-27133} }
SapiensID: Foundation for Human Recognition: Minchul Kim,

Dingqiang Ye,

Yiyang Su,

Feng Liu,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Minchul and Ye, Dingqiang and Su, Yiyang and Liu, Feng and Liu, Xiaoming}, title = {SapiensID: Foundation for Human Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13937-13947} }
MIDI: Multi-Instance Diffusion for Single Image to 3D Scene Generation: Zehuan Huang,

Yuan-Chen Guo,

Xingqiao An,

Yunhan Yang,

Yangguang Li,

Zi-Xin Zou,

Ding Liang,

Xihui Liu,

Yan-Pei Cao,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zehuan and Guo, Yuan-Chen and An, Xingqiao and Yang, Yunhan and Li, Yangguang and Zou, Zi-Xin and Liang, Ding and Liu, Xihui and Cao, Yan-Pei and Sheng, Lu}, title = {MIDI: Multi-Instance Diffusion for Single Image to 3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23646-23657} }
S4-Driver: Scalable Self-Supervised Driving Multimodal Large Language Model with Spatio-Temporal Visual Representation: Yichen Xie,

Runsheng Xu,

Tong He,

Jyh-Jing Hwang,

Katie Luo,

Jingwei Ji,

Hubert Lin,

Letian Chen,

Yiren Lu,

Zhaoqi Leng,

Dragomir Anguelov,

Mingxing Tan; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yichen and Xu, Runsheng and He, Tong and Hwang, Jyh-Jing and Luo, Katie and Ji, Jingwei and Lin, Hubert and Chen, Letian and Lu, Yiren and Leng, Zhaoqi and Anguelov, Dragomir and Tan, Mingxing}, title = {S4-Driver: Scalable Self-Supervised Driving Multimodal Large Language Model with Spatio-Temporal Visual Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1622-1632} }
Extrapolating and Decoupling Image-to-Video Generation Models: Motion Modeling is Easier Than You Think: Jie Tian,

Xiaoye Qu,

Zhenyi Lu,

Wei Wei,

Sichen Liu,

Yu Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Jie and Qu, Xiaoye and Lu, Zhenyi and Wei, Wei and Liu, Sichen and Cheng, Yu}, title = {Extrapolating and Decoupling Image-to-Video Generation Models: Motion Modeling is Easier Than You Think}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12512-12521} }
FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling: Hang Ye,

Xiaoxuan Ma,

Hai Ci,

Wentao Zhu,

Yizhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Hang and Ma, Xiaoxuan and Ci, Hai and Zhu, Wentao and Wang, Yizhou}, title = {FreeCloth: Free-form Generation Enhances Challenging Clothed Human Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15987-15997} }
ABC-Former: Auxiliary Bimodal Cross-domain Transformer with Interactive Channel Attention for White Balance: Yu-Cheng Chiu,

Guan-Rong Chen,

Zihao Chen,

Yan-Tsung Peng; [pdf] [supp]
[bibtex]
@InProceedings{Chiu_2025_CVPR, author = {Chiu, Yu-Cheng and Chen, Guan-Rong and Chen, Zihao and Peng, Yan-Tsung}, title = {ABC-Former: Auxiliary Bimodal Cross-domain Transformer with Interactive Channel Attention for White Balance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21258-21266} }
Science-T2I: Addressing Scientific Illusions in Image Synthesis: Jialuo Li,

Wenhao Chai,

Xingyu Fu,

Haiyang Xu,

Saining Xie; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jialuo and Chai, Wenhao and Fu, Xingyu and Xu, Haiyang and Xie, Saining}, title = {Science-T2I: Addressing Scientific Illusions in Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2734-2744} }
Fingerprinting Denoising Diffusion Probabilistic Models: Huan Teng,

Yuhui Quan,

Chengyu Wang,

Jun Huang,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{Teng_2025_CVPR, author = {Teng, Huan and Quan, Yuhui and Wang, Chengyu and Huang, Jun and Ji, Hui}, title = {Fingerprinting Denoising Diffusion Probabilistic Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28811-28820} }
MoST: Efficient Monarch Sparse Tuning for 3D Representation Learning: Xu Han,

Yuan Tang,

Jinfeng Xu,

Xianzhi Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Xu and Tang, Yuan and Xu, Jinfeng and Li, Xianzhi}, title = {MoST: Efficient Monarch Sparse Tuning for 3D Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6584-6594} }
Re-thinking Temporal Search for Long-Form Video Understanding: Jinhui Ye,

Zihan Wang,

Haosen Sun,

Keshigeyan Chandrasegaran,

Zane Durante,

Cristobal Eyzaguirre,

Yonatan Bisk,

Juan Carlos Niebles,

Ehsan Adeli,

Li Fei-Fei,

Jiajun Wu,

Manling Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Jinhui and Wang, Zihan and Sun, Haosen and Chandrasegaran, Keshigeyan and Durante, Zane and Eyzaguirre, Cristobal and Bisk, Yonatan and Niebles, Juan Carlos and Adeli, Ehsan and Fei-Fei, Li and Wu, Jiajun and Li, Manling}, title = {Re-thinking Temporal Search for Long-Form Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8579-8591} }
InstanceGaussian: Appearance-Semantic Joint Gaussian Representation for 3D Instance-Level Perception: Haijie Li,

Yanmin Wu,

Jiarui Meng,

Qiankun Gao,

Zhiyao Zhang,

Ronggang Wang,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haijie and Wu, Yanmin and Meng, Jiarui and Gao, Qiankun and Zhang, Zhiyao and Wang, Ronggang and Zhang, Jian}, title = {InstanceGaussian: Appearance-Semantic Joint Gaussian Representation for 3D Instance-Level Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14078-14088} }
When Domain Generalization meets Generalized Category Discovery: An Adaptive Task-Arithmetic Driven Approach: Vaibhav Rathore,

Shubhranil B,

Saikat Dutta,

Sarthak Mehrotra,

Zsolt Kira,

Biplab Banerjee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rathore_2025_CVPR, author = {Rathore, Vaibhav and B, Shubhranil and Dutta, Saikat and Mehrotra, Sarthak and Kira, Zsolt and Banerjee, Biplab}, title = {When Domain Generalization meets Generalized Category Discovery: An Adaptive Task-Arithmetic Driven Approach}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4905-4915} }
CSC-PA: Cross-image Semantic Correlation via Prototype Attentions for Single-network Semi-supervised Breast Tumor Segmentation: Zhenhui Ding,

Guilian Chen,

Qin Zhang,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Zhenhui and Chen, Guilian and Zhang, Qin and Wu, Huisi and Qin, Jing}, title = {CSC-PA: Cross-image Semantic Correlation via Prototype Attentions for Single-network Semi-supervised Breast Tumor Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15632-15641} }
BIP3D: Bridging 2D Images and 3D Perception for Embodied Intelligence: Xuewu Lin,

Tianwei Lin,

Lichao Huang,

Hongyu Xie,

Zhizhong Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Xuewu and Lin, Tianwei and Huang, Lichao and Xie, Hongyu and Su, Zhizhong}, title = {BIP3D: Bridging 2D Images and 3D Perception for Embodied Intelligence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9007-9016} }
Query Efficient Black-Box Visual Prompting with Subspace Learning: Zhaogeng Liu,

Haozhen Zhang,

Hualin Zhang,

Xingchen Li,

Wanli Shi,

Bin Gu,

Yi Chang; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaogeng and Zhang, Haozhen and Zhang, Hualin and Li, Xingchen and Shi, Wanli and Gu, Bin and Chang, Yi}, title = {Query Efficient Black-Box Visual Prompting with Subspace Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4322-4331} }
VisionPAD: A Vision-Centric Pre-training Paradigm for Autonomous Driving: Haiming Zhang,

Wending Zhou,

Yiyao Zhu,

Xu Yan,

Jiantao Gao,

Dongfeng Bai,

Yingjie Cai,

Bingbing Liu,

Shuguang Cui,

Zhen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haiming and Zhou, Wending and Zhu, Yiyao and Yan, Xu and Gao, Jiantao and Bai, Dongfeng and Cai, Yingjie and Liu, Bingbing and Cui, Shuguang and Li, Zhen}, title = {VisionPAD: A Vision-Centric Pre-training Paradigm for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {17165-17175} }
Detecting Adversarial Data Using Perturbation Forgery: Qian Wang,

Chen Li,

Yuchen Luo,

Hefei Ling,

Shijuan Huang,

Ruoxi Jia,

Ning Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qian and Li, Chen and Luo, Yuchen and Ling, Hefei and Huang, Shijuan and Jia, Ruoxi and Yu, Ning}, title = {Detecting Adversarial Data Using Perturbation Forgery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13917-13926} }
CoA: Towards Real Image Dehazing via Compression-and-Adaptation: Long Ma,

Yuxin Feng,

Yan Zhang,

Jinyuan Liu,

Weimin Wang,

Guang-Yong Chen,

Chengpei Xu,

Zhuo Su; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Long and Feng, Yuxin and Zhang, Yan and Liu, Jinyuan and Wang, Weimin and Chen, Guang-Yong and Xu, Chengpei and Su, Zhuo}, title = {CoA: Towards Real Image Dehazing via Compression-and-Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {11197-11206} }
NightAdapter: Learning a Frequency Adapter for Generalizable Night-time Scene Segmentation: Qi Bi,

Jingjun Yi,

Huimin Huang,

Hao Zheng,

Haolan Zhan,

Yawen Huang,

Yuexiang Li,

Xian Wu,

Yefeng Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Bi_2025_CVPR, author = {Bi, Qi and Yi, Jingjun and Huang, Huimin and Zheng, Hao and Zhan, Haolan and Huang, Yawen and Li, Yuexiang and Wu, Xian and Zheng, Yefeng}, title = {NightAdapter: Learning a Frequency Adapter for Generalizable Night-time Scene Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23838-23849} }
UMFN: Unified Multi-Domain Face Normalization for Joint Cross-domain Prototype Learning and Heterogeneous Face Recognition: Meng Pang,

Wenjun Zhang,

Nanrun Zhou,

Shengbo Chen,

Hong Rao; [pdf]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Meng and Zhang, Wenjun and Zhou, Nanrun and Chen, Shengbo and Rao, Hong}, title = {UMFN: Unified Multi-Domain Face Normalization for Joint Cross-domain Prototype Learning and Heterogeneous Face Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29299-29308} }
TopV: Compatible Token Pruning with Inference Time Optimization for Fast and Low-Memory Multimodal Vision Language Model: Cheng Yang,

Yang Sui,

Jinqi Xiao,

Lingyi Huang,

Yu Gong,

Chendi Li,

Jinghua Yan,

Yu Bai,

Ponnuswamy Sadayappan,

Xia Hu,

Bo Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Cheng and Sui, Yang and Xiao, Jinqi and Huang, Lingyi and Gong, Yu and Li, Chendi and Yan, Jinghua and Bai, Yu and Sadayappan, Ponnuswamy and Hu, Xia and Yuan, Bo}, title = {TopV: Compatible Token Pruning with Inference Time Optimization for Fast and Low-Memory Multimodal Vision Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19803-19813} }
Improving Autoregressive Visual Generation with Cluster-Oriented Token Prediction: Teng Hu,

Jiangning Zhang,

Ran Yi,

Jieyu Weng,

Yabiao Wang,

Xianfang Zeng,

Zhucun Xue,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Teng and Zhang, Jiangning and Yi, Ran and Weng, Jieyu and Wang, Yabiao and Zeng, Xianfang and Xue, Zhucun and Ma, Lizhuang}, title = {Improving Autoregressive Visual Generation with Cluster-Oriented Token Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9351-9360} }
Learned Binocular-Encoding Optics for RGBD Imaging Using Joint Stereo and Focus Cues: Yuhui Liu,

Liangxun Ou,

Qiang Fu,

Hadi Amata,

Wolfgang Heidrich,

Yifan Peng; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuhui and Ou, Liangxun and Fu, Qiang and Amata, Hadi and Heidrich, Wolfgang and Peng, Yifan}, title = {Learned Binocular-Encoding Optics for RGBD Imaging Using Joint Stereo and Focus Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15833-15842} }
Dual-view X-ray Detection: Can AI Detect Prohibited Items from Dual-view X-ray Images like Humans?: Renshuai Tao,

Haoyu Wang,

Yuzhe Guo,

Hairong Chen,

Li Zhang,

Xianglong Liu,

Yunchao Wei,

Yao Zhao; [pdf]
[bibtex]
@InProceedings{Tao_2025_CVPR, author = {Tao, Renshuai and Wang, Haoyu and Guo, Yuzhe and Chen, Hairong and Zhang, Li and Liu, Xianglong and Wei, Yunchao and Zhao, Yao}, title = {Dual-view X-ray Detection: Can AI Detect Prohibited Items from Dual-view X-ray Images like Humans?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {10338-10347} }
LUCAS: Layered Universal Codec Avatars: Di Liu,

Teng Deng,

Giljoo Nam,

Yu Rong,

Stanislav Pidhorskyi,

Junxuan Li,

Jason Saragih,

Dimitris N. Metaxas,

Chen Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Di and Deng, Teng and Nam, Giljoo and Rong, Yu and Pidhorskyi, Stanislav and Li, Junxuan and Saragih, Jason and Metaxas, Dimitris N. and Cao, Chen}, title = {LUCAS: Layered Universal Codec Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21127-21137} }
MobilePortrait: Real-Time One-Shot Neural Head Avatars on Mobile Devices: Jianwen Jiang,

Gaojie Lin,

Zhengkun Rong,

Chao Liang,

Yongming Zhu,

Jiaqi Yang,

Tianyun Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jianwen and Lin, Gaojie and Rong, Zhengkun and Liang, Chao and Zhu, Yongming and Yang, Jiaqi and Zhong, Tianyun}, title = {MobilePortrait: Real-Time One-Shot Neural Head Avatars on Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15920-15929} }
D^3: Scaling Up Deepfake Detection by Learning from Discrepancy: Yongqi Yang,

Zhihao Qian,

Ye Zhu,

Olga Russakovsky,

Yu Wu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yongqi and Qian, Zhihao and Zhu, Ye and Russakovsky, Olga and Wu, Yu}, title = {D{\textasciicircum}3: Scaling Up Deepfake Detection by Learning from Discrepancy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23850-23859} }
Jailbreaking the Non-Transferable Barrier via Test-Time Data Disguising: Yongli Xiang,

Ziming Hong,

Lina Yao,

Dadong Wang,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_CVPR, author = {Xiang, Yongli and Hong, Ziming and Yao, Lina and Wang, Dadong and Liu, Tongliang}, title = {Jailbreaking the Non-Transferable Barrier via Test-Time Data Disguising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30671-30681} }
Light3R-SfM: Towards Feed-forward Structure-from-Motion: Sven Elflein,

Qunjie Zhou,

Laura Leal-Taixé; [pdf] [supp]
[bibtex]
@InProceedings{Elflein_2025_CVPR, author = {Elflein, Sven and Zhou, Qunjie and Leal-Taix\'e, Laura}, title = {Light3R-SfM: Towards Feed-forward Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16774-16784} }
Robotic Visual Instruction: Yanbang Li,

Ziyang Gong,

Haoyang Li,

Xiaoqi Huang,

Haolan Kang,

Guangping Bai,

Xianzheng Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yanbang and Gong, Ziyang and Li, Haoyang and Huang, Xiaoqi and Kang, Haolan and Bai, Guangping and Ma, Xianzheng}, title = {Robotic Visual Instruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {12155-12165} }
Solving Instance Detection from an Open-World Perspective: Qianqian Shen,

Yunhan Zhao,

Nahyun Kwon,

Jeeeun Kim,

Yanan Li,

Shu Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Qianqian and Zhao, Yunhan and Kwon, Nahyun and Kim, Jeeeun and Li, Yanan and Kong, Shu}, title = {Solving Instance Detection from an Open-World Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {9901-9910} }
Percept, Memory, and Imagine: World Feature Simulating for Open-Domain Unknown Object Detection: Aming Wu,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Aming and Deng, Cheng}, title = {Percept, Memory, and Imagine: World Feature Simulating for Open-Domain Unknown Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4682-4691} }
Efficient Depth Estimation for Unstable Stereo Camera Systems on AR Glasses: Yongfan Liu,

Hyoukjun Kwon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yongfan and Kwon, Hyoukjun}, title = {Efficient Depth Estimation for Unstable Stereo Camera Systems on AR Glasses}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {6252-6261} }
3D-GRAND: A Million-Scale Dataset for 3D-LLMs with Better Grounding and Less Hallucination: Jianing Yang,

Xuweiyi Chen,

Nikhil Madaan,

Madhavan Iyengar,

Shengyi Qian,

David F. Fouhey,

Joyce Chai; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jianing and Chen, Xuweiyi and Madaan, Nikhil and Iyengar, Madhavan and Qian, Shengyi and Fouhey, David F. and Chai, Joyce}, title = {3D-GRAND: A Million-Scale Dataset for 3D-LLMs with Better Grounding and Less Hallucination}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29501-29512} }
LiDAR-RT: Gaussian-based Ray Tracing for Dynamic LiDAR Re-simulation: Chenxu Zhou,

Lvchang Fu,

Sida Peng,

Yunzhi Yan,

Zhanhua Zhang,

Yong Chen,

Jiazhi Xia,

Xiaowei Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Chenxu and Fu, Lvchang and Peng, Sida and Yan, Yunzhi and Zhang, Zhanhua and Chen, Yong and Xia, Jiazhi and Zhou, Xiaowei}, title = {LiDAR-RT: Gaussian-based Ray Tracing for Dynamic LiDAR Re-simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {1538-1548} }
Generative Zero-Shot Composed Image Retrieval: Lan Wang,

Wei Ao,

Vishnu Naresh Boddeti,

Ser-Nam Lim; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lan and Ao, Wei and Boddeti, Vishnu Naresh and Lim, Ser-Nam}, title = {Generative Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29690-29700} }
Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator: Chaehun Shin,

Jooyoung Choi,

Heeseung Kim,

Sungroh Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shin_2025_CVPR, author = {Shin, Chaehun and Choi, Jooyoung and Kim, Heeseung and Yoon, Sungroh}, title = {Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7986-7996} }
MASt3R-SLAM: Real-Time Dense SLAM with 3D Reconstruction Priors: Riku Murai,

Eric Dexheimer,

Andrew J. Davison; [pdf] [supp]
[bibtex]
@InProceedings{Murai_2025_CVPR, author = {Murai, Riku and Dexheimer, Eric and Davison, Andrew J.}, title = {MASt3R-SLAM: Real-Time Dense SLAM with 3D Reconstruction Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16695-16705} }
Flow-NeRF: Joint Learning of Geometry, Poses, and Dense Flow within Unified Neural Representations: Xunzhi Zheng,

Dan Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Xunzhi and Xu, Dan}, title = {Flow-NeRF: Joint Learning of Geometry, Poses, and Dense Flow within Unified Neural Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {993-1002} }
Viewpoint Rosetta Stone: Unlocking Unpaired Ego-Exo Videos for View-invariant Representation Learning: Mi Luo,

Zihui Xue,

Alex Dimakis,

Kristen Grauman; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Mi and Xue, Zihui and Dimakis, Alex and Grauman, Kristen}, title = {Viewpoint Rosetta Stone: Unlocking Unpaired Ego-Exo Videos for View-invariant Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {15802-15812} }
Cross-modal Information Flow in Multimodal Large Language Models: Zhi Zhang,

Srishti Yadav,

Fengze Han,

Ekaterina Shutova; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhi and Yadav, Srishti and Han, Fengze and Shutova, Ekaterina}, title = {Cross-modal Information Flow in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19781-19791} }
Consistent and Controllable Image Animation with Motion Diffusion Models: Xin Ma,

Yaohui Wang,

Gengyun Jia,

Xinyuan Chen,

Tien-Tsin Wong,

Yuan-Fang Li,

Cunjian Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Xin and Wang, Yaohui and Jia, Gengyun and Chen, Xinyuan and Wong, Tien-Tsin and Li, Yuan-Fang and Chen, Cunjian}, title = {Consistent and Controllable Image Animation with Motion Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7288-7298} }
Towards Better Alignment: Training Diffusion Models with Reinforcement Learning Against Sparse Rewards: Zijing Hu,

Fengda Zhang,

Long Chen,

Kun Kuang,

Jiahui Li,

Kaifeng Gao,

Jun Xiao,

Xin Wang,

Wenwu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Zijing and Zhang, Fengda and Chen, Long and Kuang, Kun and Li, Jiahui and Gao, Kaifeng and Xiao, Jun and Wang, Xin and Zhu, Wenwu}, title = {Towards Better Alignment: Training Diffusion Models with Reinforcement Learning Against Sparse Rewards}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23604-23614} }
Spatial457: A Diagnostic Benchmark for 6D Spatial Reasoning of Large Mutimodal Models: Xingrui Wang,

Wufei Ma,

Tiezheng Zhang,

Celso M de Melo,

Jieneng Chen,

Alan Yuille; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xingrui and Ma, Wufei and Zhang, Tiezheng and de Melo, Celso M and Chen, Jieneng and Yuille, Alan}, title = {Spatial457: A Diagnostic Benchmark for 6D Spatial Reasoning of Large Mutimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24669-24679} }
Omnidirectional Multi-Object Tracking: Kai Luo,

Hao Shi,

Sheng Wu,

Fei Teng,

Mengfei Duan,

Chang Huang,

Yuhang Wang,

Kaiwei Wang,

Kailun Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Kai and Shi, Hao and Wu, Sheng and Teng, Fei and Duan, Mengfei and Huang, Chang and Wang, Yuhang and Wang, Kaiwei and Yang, Kailun}, title = {Omnidirectional Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21959-21969} }
Potential Field Based Deep Metric Learning: Shubhang Bhatnagar,

Narendra Ahuja; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhatnagar_2025_CVPR, author = {Bhatnagar, Shubhang and Ahuja, Narendra}, title = {Potential Field Based Deep Metric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25549-25559} }
Enhancing Vision-Language Compositional Understanding with Multimodal Synthetic Data: Haoxin Li,

Boyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haoxin and Li, Boyang}, title = {Enhancing Vision-Language Compositional Understanding with Multimodal Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24849-24861} }
Directional Label Diffusion Model for Learning from Noisy Labels: Senyu Hou,

Gaoxia Jiang,

Jia Zhang,

Shangrong Yang,

Husheng Guo,

Yaqing Guo,

Wenjian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2025_CVPR, author = {Hou, Senyu and Jiang, Gaoxia and Zhang, Jia and Yang, Shangrong and Guo, Husheng and Guo, Yaqing and Wang, Wenjian}, title = {Directional Label Diffusion Model for Learning from Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25738-25748} }
AA-CLIP: Enhancing Zero-Shot Anomaly Detection via Anomaly-Aware CLIP: Wenxin Ma,

Xu Zhang,

Qingsong Yao,

Fenghe Tang,

Chenxu Wu,

Yingtai Li,

Rui Yan,

Zihang Jiang,

S.Kevin Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Wenxin and Zhang, Xu and Yao, Qingsong and Tang, Fenghe and Wu, Chenxu and Li, Yingtai and Yan, Rui and Jiang, Zihang and Zhou, S.Kevin}, title = {AA-CLIP: Enhancing Zero-Shot Anomaly Detection via Anomaly-Aware CLIP}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4744-4754} }
HybridGS: Decoupling Transients and Statics with 2D and 3D Gaussian Splatting: Jingyu Lin,

Jiaqi Gu,

Lubin Fan,

Bojian Wu,

Yujing Lou,

Renjie Chen,

Ligang Liu,

Jieping Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jingyu and Gu, Jiaqi and Fan, Lubin and Wu, Bojian and Lou, Yujing and Chen, Renjie and Liu, Ligang and Ye, Jieping}, title = {HybridGS: Decoupling Transients and Statics with 2D and 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {788-797} }
Keyframe-Guided Creative Video Inpainting: Yuwei Guo,

Ceyuan Yang,

Anyi Rao,

Chenlin Meng,

Omer Bar-Tal,

Shuangrui Ding,

Maneesh Agrawala,

Dahua Lin,

Bo Dai; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yuwei and Yang, Ceyuan and Rao, Anyi and Meng, Chenlin and Bar-Tal, Omer and Ding, Shuangrui and Agrawala, Maneesh and Lin, Dahua and Dai, Bo}, title = {Keyframe-Guided Creative Video Inpainting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13009-13020} }
Channel Consistency Prior and Self-Reconstruction Strategy Based Unsupervised Image Deraining: Guanglu Dong,

Tianheng Zheng,

Yuanzhouhan Cao,

Linbo Qing,

Chao Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Guanglu and Zheng, Tianheng and Cao, Yuanzhouhan and Qing, Linbo and Ren, Chao}, title = {Channel Consistency Prior and Self-Reconstruction Strategy Based Unsupervised Image Deraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {7469-7479} }
MobileMamba: Lightweight Multi-Receptive Visual Mamba Network: Haoyang He,

Jiangning Zhang,

Yuxuan Cai,

Hongxu Chen,

Xiaobin Hu,

Zhenye Gan,

Yabiao Wang,

Chengjie Wang,

Yunsheng Wu,

Lei Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Haoyang and Zhang, Jiangning and Cai, Yuxuan and Chen, Hongxu and Hu, Xiaobin and Gan, Zhenye and Wang, Yabiao and Wang, Chengjie and Wu, Yunsheng and Xie, Lei}, title = {MobileMamba: Lightweight Multi-Receptive Visual Mamba Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4497-4507} }
EdgeTAM: On-Device Track Anything Model: Chong Zhou,

Chenchen Zhu,

Yunyang Xiong,

Saksham Suri,

Fanyi Xiao,

Lemeng Wu,

Raghuraman Krishnamoorthi,

Bo Dai,

Chen Change Loy,

Vikas Chandra,

Bilge Soran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Chong and Zhu, Chenchen and Xiong, Yunyang and Suri, Saksham and Xiao, Fanyi and Wu, Lemeng and Krishnamoorthi, Raghuraman and Dai, Bo and Loy, Chen Change and Chandra, Vikas and Soran, Bilge}, title = {EdgeTAM: On-Device Track Anything Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {13832-13842} }
SimLTD: Simple Supervised and Semi-Supervised Long-Tailed Object Detection: Phi Vu Tran; [pdf] [arXiv]
[bibtex]
@InProceedings{Tran_2025_CVPR, author = {Tran, Phi Vu}, title = {SimLTD: Simple Supervised and Semi-Supervised Long-Tailed Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {4672-4681} }
EarthDial: Turning Multi-sensory Earth Observations to Interactive Dialogues: Sagar Soni,

Akshay Dudhane,

Hiyam Debary,

Mustansar Fiaz,

Muhammad Akhtar Munir,

Muhammad Sohail Danish,

Paolo Fraccaro,

Campbell D Watson,

Levente J Klein,

Fahad Shahbaz Khan,

Salman Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Soni_2025_CVPR, author = {Soni, Sagar and Dudhane, Akshay and Debary, Hiyam and Fiaz, Mustansar and Munir, Muhammad Akhtar and Danish, Muhammad Sohail and Fraccaro, Paolo and Watson, Campbell D and Klein, Levente J and Khan, Fahad Shahbaz and Khan, Salman}, title = {EarthDial: Turning Multi-sensory Earth Observations to Interactive Dialogues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {14303-14313} }
Learning Endogenous Attention for Incremental Object Detection: Xiang Song,

Yuhang He,

Jingyuan Li,

Qiang Wang,

Yihong Gong; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Xiang and He, Yuhang and Li, Jingyuan and Wang, Qiang and Gong, Yihong}, title = {Learning Endogenous Attention for Incremental Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30354-30364} }
StarGen: A Spatiotemporal Autoregression Framework with Video Diffusion Model for Scalable and Controllable Scene Generation: Shangjin Zhai,

Zhichao Ye,

Jialin Liu,

Weijian Xie,

Jiaqi Hu,

Zhen Peng,

Hua Xue,

Danpeng Chen,

Xiaomeng Wang,

Lei Yang,

Nan Wang,

Haomin Liu,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhai_2025_CVPR, author = {Zhai, Shangjin and Ye, Zhichao and Liu, Jialin and Xie, Weijian and Hu, Jiaqi and Peng, Zhen and Xue, Hua and Chen, Danpeng and Wang, Xiaomeng and Yang, Lei and Wang, Nan and Liu, Haomin and Zhang, Guofeng}, title = {StarGen: A Spatiotemporal Autoregression Framework with Video Diffusion Model for Scalable and Controllable Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26822-26833} }
HyperSeg: Hybrid Segmentation Assistant with Fine-grained Visual Perceiver: Cong Wei,

Yujie Zhong,

Haoxian Tan,

Yong Liu,

Jie Hu,

Dengjie Li,

Zheng Zhao,

Yujiu Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Cong and Zhong, Yujie and Tan, Haoxian and Liu, Yong and Hu, Jie and Li, Dengjie and Zhao, Zheng and Yang, Yujiu}, title = {HyperSeg: Hybrid Segmentation Assistant with Fine-grained Visual Perceiver}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8931-8941} }
Diffusion-based Event Generation for High-Quality Image Deblurring: Xinan Xie,

Qing Zhang,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Xinan and Zhang, Qing and Zheng, Wei-Shi}, title = {Diffusion-based Event Generation for High-Quality Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2194-2203} }
Video Summarization with Large Language Models: Min Jung Lee,

Dayoung Gong,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Min Jung and Gong, Dayoung and Cho, Minsu}, title = {Video Summarization with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18981-18991} }
Sketchtopia: A Dataset and Foundational Agents for Benchmarking Asynchronous Multimodal Communication with Iconic Feedback: Mohd Hozaifa Khan,

Ravi Kiran Sarvadevabhatla; [pdf] [supp]
[bibtex]
@InProceedings{Khan_2025_CVPR, author = {Khan, Mohd Hozaifa and Sarvadevabhatla, Ravi Kiran}, title = {Sketchtopia: A Dataset and Foundational Agents for Benchmarking Asynchronous Multimodal Communication with Iconic Feedback}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {18176-18186} }
Consistency-aware Self-Training for Iterative-based Stereo Matching: Jingyi Zhou,

Peng Ye,

Haoyu Zhang,

Jiakang Yuan,

Rao Qiang,

Liu YangChenXu,

Wu Cailin,

Feng Xu,

Tao Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jingyi and Ye, Peng and Zhang, Haoyu and Yuan, Jiakang and Qiang, Rao and YangChenXu, Liu and Cailin, Wu and Xu, Feng and Chen, Tao}, title = {Consistency-aware Self-Training for Iterative-based Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16641-16650} }
MV-MATH: Evaluating Multimodal Math Reasoning in Multi-Visual Contexts: Peijie Wang,

Zhong-Zhi Li,

Fei Yin,

Dekang Ran,

Cheng-Lin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Peijie and Li, Zhong-Zhi and Yin, Fei and Ran, Dekang and Liu, Cheng-Lin}, title = {MV-MATH: Evaluating Multimodal Math Reasoning in Multi-Visual Contexts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {19541-19551} }
Balanced Rate-Distortion Optimization in Learned Image Compression: Yichi Zhang,

Zhihao Duan,

Yuning Huang,

Fengqing Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yichi and Duan, Zhihao and Huang, Yuning and Zhu, Fengqing}, title = {Balanced Rate-Distortion Optimization in Learned Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {2428-2438} }
Bridge the Gap: From Weak to Full Supervision for Temporal Action Localization with PseudoFormer: Ziyi Liu,

Yangcen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Ziyi and Liu, Yangcen}, title = {Bridge the Gap: From Weak to Full Supervision for Temporal Action Localization with PseudoFormer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {8711-8720} }
HomoGen: Enhanced Video Inpainting via Homography Propagation and Diffusion: Ding Ding,

Yueming Pan,

Ruoyu Feng,

Qi Dai,

Kai Qiu,

Jianmin Bao,

Chong Luo,

Zhenzhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Ding and Pan, Yueming and Feng, Ruoyu and Dai, Qi and Qiu, Kai and Bao, Jianmin and Luo, Chong and Chen, Zhenzhong}, title = {HomoGen: Enhanced Video Inpainting via Homography Propagation and Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22953-22962} }
Generalized Few-shot 3D Point Cloud Segmentation with Vision-Language Model: Zhaochong An,

Guolei Sun,

Yun Liu,

Runjia Li,

Junlin Han,

Ender Konukoglu,

Serge Belongie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2025_CVPR, author = {An, Zhaochong and Sun, Guolei and Liu, Yun and Li, Runjia and Han, Junlin and Konukoglu, Ender and Belongie, Serge}, title = {Generalized Few-shot 3D Point Cloud Segmentation with Vision-Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {16997-17007} }
Do ImageNet-trained Models Learn Shortcuts? The Impact of Frequency Shortcuts on Generalization: Shunxin Wang,

Raymond Veldhuis,

Nicola Strisciuglio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shunxin and Veldhuis, Raymond and Strisciuglio, Nicola}, title = {Do ImageNet-trained Models Learn Shortcuts? The Impact of Frequency Shortcuts on Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25198-25207} }
HORP: Human-Object Relation Priors Guided HOI Detection: Pei Geng,

Jian Yang,

Shanshan Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Geng_2025_CVPR, author = {Geng, Pei and Yang, Jian and Zhang, Shanshan}, title = {HORP: Human-Object Relation Priors Guided HOI Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25325-25335} }
Building a Mind Palace: Structuring Environment-Grounded Semantic Graphs for Effective Long Video Analysis with LLMs: Zeyi Huang,

Yuyang Ji,

Xiaofang Wang,

Nikhil Mehta,

Tong Xiao,

Donghyun Lee,

Sigmund Vanvalkenburgh,

Shengxin Zha,

Bolin Lai,

Licheng Yu,

Ning Zhang,

Yong Jae Lee,

Miao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zeyi and Ji, Yuyang and Wang, Xiaofang and Mehta, Nikhil and Xiao, Tong and Lee, Donghyun and Vanvalkenburgh, Sigmund and Zha, Shengxin and Lai, Bolin and Yu, Licheng and Zhang, Ning and Lee, Yong Jae and Liu, Miao}, title = {Building a Mind Palace: Structuring Environment-Grounded Semantic Graphs for Effective Long Video Analysis with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24169-24179} }; Back