CVPR 2025 Open Access Repository

Papers

Back
Deterministic Image-to-Image Translation via Denoising Brownian Bridge Models with Dual Approximators: Bohan Xiao,

Peiyong Wang,

Qisheng He,

Ming Dong; [pdf]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Bohan and Wang, Peiyong and He, Qisheng and Dong, Ming}, title = {Deterministic Image-to-Image Translation via Denoising Brownian Bridge Models with Dual Approximators}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28232-28241} }
Task Preference Optimization: Improving Multimodal Large Language Models with Vision Task Alignment: Ziang Yan,

Zhilin Li,

Yinan He,

Chenting Wang,

Kunchang Li,

Xinhao Li,

Xiangyu Zeng,

Zilei Wang,

Yali Wang,

Yu Qiao,

Limin Wang,

Yi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Ziang and Li, Zhilin and He, Yinan and Wang, Chenting and Li, Kunchang and Li, Xinhao and Zeng, Xiangyu and Wang, Zilei and Wang, Yali and Qiao, Yu and Wang, Limin and Wang, Yi}, title = {Task Preference Optimization: Improving Multimodal Large Language Models with Vision Task Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29880-29892} }
Cross-modal Causal Relation Alignment for Video Question Grounding: Weixing Chen,

Yang Liu,

Binglin Chen,

Jiandong Su,

Yongsen Zheng,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Weixing and Liu, Yang and Chen, Binglin and Su, Jiandong and Zheng, Yongsen and Lin, Liang}, title = {Cross-modal Causal Relation Alignment for Video Question Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24087-24096} }
Diffusion Renderer: Neural Inverse and Forward Rendering with Video Diffusion Models: Ruofan Liang,

Zan Gojcic,

Huan Ling,

Jacob Munkberg,

Jon Hasselgren,

Chih-Hao Lin,

Jun Gao,

Alexander Keller,

Nandita Vijaykumar,

Sanja Fidler,

Zian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Ruofan and Gojcic, Zan and Ling, Huan and Munkberg, Jacob and Hasselgren, Jon and Lin, Chih-Hao and Gao, Jun and Keller, Alexander and Vijaykumar, Nandita and Fidler, Sanja and Wang, Zian}, title = {Diffusion Renderer: Neural Inverse and Forward Rendering with Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26069-26080} }
Omni-Scene: Omni-Gaussian Representation for Ego-Centric Sparse-View Scene Reconstruction: Dongxu Wei,

Zhiqi Li,

Peidong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Dongxu and Li, Zhiqi and Liu, Peidong}, title = {Omni-Scene: Omni-Gaussian Representation for Ego-Centric Sparse-View Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22317-22327} }
3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion: Zhaoxi Chen,

Jiaxiang Tang,

Yuhao Dong,

Ziang Cao,

Fangzhou Hong,

Yushi Lan,

Tengfei Wang,

Haozhe Xie,

Tong Wu,

Shunsuke Saito,

Liang Pan,

Dahua Lin,

Ziwei Liu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhaoxi and Tang, Jiaxiang and Dong, Yuhao and Cao, Ziang and Hong, Fangzhou and Lan, Yushi and Wang, Tengfei and Xie, Haozhe and Wu, Tong and Saito, Shunsuke and Pan, Liang and Lin, Dahua and Liu, Ziwei}, title = {3DTopia-XL: Scaling High-quality 3D Asset Generation via Primitive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26576-26586} }
Missing Target-Relevant Information Prediction with World Model for Accurate Zero-Shot Composed Image Retrieval: Yuanmin Tang,

Jing Yu,

Keke Gai,

Jiamin Zhuang,

Gang Xiong,

Gaopeng Gou,

Qi Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yuanmin and Yu, Jing and Gai, Keke and Zhuang, Jiamin and Xiong, Gang and Gou, Gaopeng and Wu, Qi}, title = {Missing Target-Relevant Information Prediction with World Model for Accurate Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24785-24795} }
DiffSensei: Bridging Multi-Modal LLMs and Diffusion Models for Customized Manga Generation: Jianzong Wu,

Chao Tang,

Jingbo Wang,

Yanhong Zeng,

Xiangtai Li,

Yunhai Tong; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jianzong and Tang, Chao and Wang, Jingbo and Zeng, Yanhong and Li, Xiangtai and Tong, Yunhai}, title = {DiffSensei: Bridging Multi-Modal LLMs and Diffusion Models for Customized Manga Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28684-28693} }
Narrating the Video: Boosting Text-Video Retrieval via Comprehensive Utilization of Frame-Level Captions: Chan Hur,

Jeong-hun Hong,

Dong-hun Lee,

Dabin Kang,

Semin Myeong,

Sang-hyo Park,

Hyeyoung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hur_2025_CVPR, author = {Hur, Chan and Hong, Jeong-hun and Lee, Dong-hun and Kang, Dabin and Myeong, Semin and Park, Sang-hyo and Park, Hyeyoung}, title = {Narrating the Video: Boosting Text-Video Retrieval via Comprehensive Utilization of Frame-Level Captions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24077-24086} }
CARL: A Framework for Equivariant Image Registration: Hastings Greer,

Lin Tian,

François-Xavier Vialard,

Roland Kwitt,

Raul San Jose Estepar,

Marc Niethammer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Greer_2025_CVPR, author = {Greer, Hastings and Tian, Lin and Vialard, Fran\c{c}ois-Xavier and Kwitt, Roland and Estepar, Raul San Jose and Niethammer, Marc}, title = {CARL: A Framework for Equivariant Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26014-26023} }
FlashGS: Efficient 3D Gaussian Splatting for Large-scale and High-resolution Rendering: Guofeng Feng,

Siyan Chen,

Rong Fu,

Zimu Liao,

Yi Wang,

Tao Liu,

Boni Hu,

Linning Xu,

Zhilin Pei,

Hengjie Li,

Xiuhong Li,

Ninghui Sun,

Xingcheng Zhang,

Bo Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Guofeng and Chen, Siyan and Fu, Rong and Liao, Zimu and Wang, Yi and Liu, Tao and Hu, Boni and Xu, Linning and Pei, Zhilin and Li, Hengjie and Li, Xiuhong and Sun, Ninghui and Zhang, Xingcheng and Dai, Bo}, title = {FlashGS: Efficient 3D Gaussian Splatting for Large-scale and High-resolution Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26652-26662} }
Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models: Ronghuan Wu,

Wanchao Su,

Jing Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Ronghuan and Su, Wanchao and Liao, Jing}, title = {Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23690-23700} }
Inference-Scale Complexity in ANN-SNN Conversion for High-Performance and Low-Power Applications: Tong Bu,

Maohua Li,

Zhaofei Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bu_2025_CVPR, author = {Bu, Tong and Li, Maohua and Yu, Zhaofei}, title = {Inference-Scale Complexity in ANN-SNN Conversion for High-Performance and Low-Power Applications}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24387-24397} }
MVDoppler-Pose: Multi-Modal Multi-View mmWave Sensing for Long-Distance Self-Occluded Human Walking Pose Estimation: Jaeho Choi,

Soheil Hor,

Shubo Yang,

Amin Arbabian; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Jaeho and Hor, Soheil and Yang, Shubo and Arbabian, Amin}, title = {MVDoppler-Pose: Multi-Modal Multi-View mmWave Sensing for Long-Distance Self-Occluded Human Walking Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27750-27759} }
TopNet: Transformer-Efficient Occupancy Prediction Network for Octree-Structured Point Cloud Geometry Compression: Xinjie Wang,

Yifan Zhang,

Ting Liu,

Xinpu Liu,

Ke Xu,

Jianwei Wan,

Yulan Guo,

Hanyun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xinjie and Zhang, Yifan and Liu, Ting and Liu, Xinpu and Xu, Ke and Wan, Jianwei and Guo, Yulan and Wang, Hanyun}, title = {TopNet: Transformer-Efficient Occupancy Prediction Network for Octree-Structured Point Cloud Geometry Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27305-27314} }
Gain from Neighbors: Boosting Model Robustness in the Wild via Adversarial Perturbations Toward Neighboring Classes: Zhou Yang,

Mingtao Feng,

Tao Huang,

Fangfang Wu,

Weisheng Dong,

Xin Li,

Guangming Shi; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhou and Feng, Mingtao and Huang, Tao and Wu, Fangfang and Dong, Weisheng and Li, Xin and Shi, Guangming}, title = {Gain from Neighbors: Boosting Model Robustness in the Wild via Adversarial Perturbations Toward Neighboring Classes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25497-25507} }
M^3-VOS: Multi-Phase, Multi-Transition, and Multi-Scenery Video Object Segmentation: Zixuan Chen,

Jiaxin Li,

Junxuan Liang,

Liming Tan,

Yejie Guo,

Cewu Lu,

Yong-Lu Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zixuan and Li, Jiaxin and Liang, Junxuan and Tan, Liming and Guo, Yejie and Lu, Cewu and Li, Yong-Lu}, title = {M{\textasciicircum}3-VOS: Multi-Phase, Multi-Transition, and Multi-Scenery Video Object Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29193-29202} }
Everything to the Synthetic: Diffusion-driven Test-time Adaptation via Synthetic-Domain Alignment: Jiayi Guo,

Junhao Zhao,

Chaoqun Du,

Yulin Wang,

Chunjiang Ge,

Zanlin Ni,

Shiji Song,

Humphrey Shi,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Jiayi and Zhao, Junhao and Du, Chaoqun and Wang, Yulin and Ge, Chunjiang and Ni, Zanlin and Song, Shiji and Shi, Humphrey and Huang, Gao}, title = {Everything to the Synthetic: Diffusion-driven Test-time Adaptation via Synthetic-Domain Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30503-30513} }
Multi-Granularity Class Prototype Topology Distillation for Class-Incremental Source-Free Unsupervised Domain Adaptation: Peihua Deng,

Jiehua Zhang,

Xichun Sheng,

Chenggang Yan,

Yaoqi Sun,

Ying Fu,

Liang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Peihua and Zhang, Jiehua and Sheng, Xichun and Yan, Chenggang and Sun, Yaoqi and Fu, Ying and Li, Liang}, title = {Multi-Granularity Class Prototype Topology Distillation for Class-Incremental Source-Free Unsupervised Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30566-30576} }
A Polarization-Aided Transformer for Image Deblurring via Motion Vector Decomposition: Duosheng Chen,

Shihao Zhou,

Jinshan Pan,

Jinglei Shi,

Lishen Qu,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Duosheng and Zhou, Shihao and Pan, Jinshan and Shi, Jinglei and Qu, Lishen and Yang, Jufeng}, title = {A Polarization-Aided Transformer for Image Deblurring via Motion Vector Decomposition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28061-28070} }
CocoER: Aligning Multi-Level Feature by Competition and Coordination for Emotion Recognition: Xuli Shen,

Hua Cai,

Weilin Shen,

Qing Xu,

Dingding Yu,

Weifeng Ge,

Xiangyang Xue; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Xuli and Cai, Hua and Shen, Weilin and Xu, Qing and Yu, Dingding and Ge, Weifeng and Xue, Xiangyang}, title = {CocoER: Aligning Multi-Level Feature by Competition and Coordination for Emotion Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29591-29600} }
Enhancing Creative Generation on Stable Diffusion-based Models: Jiyeon Han,

Dahee Kwon,

Gayoung Lee,

Junho Kim,

Jaesik Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Jiyeon and Kwon, Dahee and Lee, Gayoung and Kim, Junho and Choi, Jaesik}, title = {Enhancing Creative Generation on Stable Diffusion-based Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28609-28618} }
Denoising Functional Maps: Diffusion Models for Shape Correspondence: Aleksei Zhuravlev,

Zorah Lähner,

Vladislav Golyanik; [pdf] [supp]
[bibtex]
@InProceedings{Zhuravlev_2025_CVPR, author = {Zhuravlev, Aleksei and L\"ahner, Zorah and Golyanik, Vladislav}, title = {Denoising Functional Maps: Diffusion Models for Shape Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26899-26909} }
ProReflow: Progressive Reflow with Decomposed Velocity: Lei Ke,

Haohang Xu,

Xuefei Ning,

Yu Li,

Jiajun Li,

Haoling Li,

Yuxuan Lin,

Dongsheng Jiang,

Yujiu Yang,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Lei and Xu, Haohang and Ning, Xuefei and Li, Yu and Li, Jiajun and Li, Haoling and Lin, Yuxuan and Jiang, Dongsheng and Yang, Yujiu and Zhang, Linfeng}, title = {ProReflow: Progressive Reflow with Decomposed Velocity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28029-28038} }
Devil is in the Detail: Towards Injecting Fine Details of Image Prompt in Image Generation via Conflict-free Guidance and Stratified Attention: Kyungmin Jo,

Jooyeol Yun,

Jaegul Choo; [pdf] [supp]
[bibtex]
@InProceedings{Jo_2025_CVPR, author = {Jo, Kyungmin and Yun, Jooyeol and Choo, Jaegul}, title = {Devil is in the Detail: Towards Injecting Fine Details of Image Prompt in Image Generation via Conflict-free Guidance and Stratified Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23595-23603} }
MetaShadow: Object-Centered Shadow Detection, Removal, and Synthesis: Tianyu Wang,

Jianming Zhang,

Haitian Zheng,

Zhihong Ding,

Scott Cohen,

Zhe Lin,

Wei Xiong,

Chi-Wing Fu,

Luis Figueroa,

Soo Ye Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tianyu and Zhang, Jianming and Zheng, Haitian and Ding, Zhihong and Cohen, Scott and Lin, Zhe and Xiong, Wei and Fu, Chi-Wing and Figueroa, Luis and Kim, Soo Ye}, title = {MetaShadow: Object-Centered Shadow Detection, Removal, and Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28252-28262} }
TANGO: Training-free Embodied AI Agents for Open-world Tasks: Filippo Ziliotto,

Tommaso Campari,

Luciano Serafini,

Lamberto Ballan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ziliotto_2025_CVPR, author = {Ziliotto, Filippo and Campari, Tommaso and Serafini, Luciano and Ballan, Lamberto}, title = {TANGO: Training-free Embodied AI Agents for Open-world Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24603-24613} }
Stealthy Backdoor Attack in Self-Supervised Learning Vision Encoders for Large Vision Language Models: Zhaoyi Liu,

Huan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhaoyi and Zhang, Huan}, title = {Stealthy Backdoor Attack in Self-Supervised Learning Vision Encoders for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25060-25070} }
SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes: Yuji Wang,

Haoran Xu,

Yong Liu,

Jiaze Li,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuji and Xu, Haoran and Liu, Yong and Li, Jiaze and Tang, Yansong}, title = {SAM2-LOVE: Segment Anything Model 2 in Language-aided Audio-Visual Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28932-28941} }
GIVEPose: Gradual Intra-class Variation Elimination for RGB-based Category-Level Object Pose Estimation: Ziqin Huang,

Gu Wang,

Chenyangguang Zhang,

Ruida Zhang,

Xiu Li,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Ziqin and Wang, Gu and Zhang, Chenyangguang and Zhang, Ruida and Li, Xiu and Ji, Xiangyang}, title = {GIVEPose: Gradual Intra-class Variation Elimination for RGB-based Category-Level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22055-22066} }
Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch: Aneeshan Sain,

Subhajit Maity,

Pinaki Nath Chowdhury,

Shubhadeep Koley,

Ayan Kumar Bhunia,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sain_2025_CVPR, author = {Sain, Aneeshan and Maity, Subhajit and Chowdhury, Pinaki Nath and Koley, Shubhadeep and Bhunia, Ayan Kumar and Song, Yi-Zhe}, title = {Sketch Down the FLOPs: Towards Efficient Networks for Human Sketch}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28383-28393} }
Rethinking Decoder Design: Improving Biomarker Segmentation Using Depth-to-Space Restoration and Residual Linear Attention: Saad Wazir,

Daeyoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wazir_2025_CVPR, author = {Wazir, Saad and Kim, Daeyoung}, title = {Rethinking Decoder Design: Improving Biomarker Segmentation Using Depth-to-Space Restoration and Residual Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30861-30871} }
SynTab-LLaVA: Enhancing Multimodal Table Understanding with Decoupled Synthesis: Bangbang Zhou,

Zuan Gao,

Zixiao Wang,

Boqiang Zhang,

Yuxin Wang,

Zhineng Chen,

Hongtao Xie; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Bangbang and Gao, Zuan and Wang, Zixiao and Zhang, Boqiang and Wang, Yuxin and Chen, Zhineng and Xie, Hongtao}, title = {SynTab-LLaVA: Enhancing Multimodal Table Understanding with Decoupled Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24796-24806} }
Edit Away and My Face Will not Stay: Personal Biometric Defense against Malicious Generative Editing: Hanhui Wang,

Yihua Zhang,

Ruizheng Bai,

Yue Zhao,

Sijia Liu,

Zhengzhong Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Hanhui and Zhang, Yihua and Bai, Ruizheng and Zhao, Yue and Liu, Sijia and Tu, Zhengzhong}, title = {Edit Away and My Face Will not Stay: Personal Biometric Defense against Malicious Generative Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23806-23816} }
Improving Accuracy and Calibration via Differentiated Deep Mutual Learning: Han Liu,

Peng Cui,

Bingning Wang,

Weipeng Chen,

Yupeng Zhang,

Jun Zhu,

Xiaolin Hu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Han and Cui, Peng and Wang, Bingning and Chen, Weipeng and Zhang, Yupeng and Zhu, Jun and Hu, Xiaolin}, title = {Improving Accuracy and Calibration via Differentiated Deep Mutual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25812-25821} }
Infighting in the Dark: Multi-Label Backdoor Attack in Federated Learning: Ye Li,

Yanchao Zhao,

Chengcheng Zhu,

Jiale Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ye and Zhao, Yanchao and Zhu, Chengcheng and Zhang, Jiale}, title = {Infighting in the Dark: Multi-Label Backdoor Attack in Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25770-25779} }
Tartan IMU: A Light Foundation Model for Inertial Positioning in Robotics: Shibo Zhao,

Sifan Zhou,

Raphael Blanchard,

Yuheng Qiu,

Wenshan Wang,

Sebastian Scherer; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shibo and Zhou, Sifan and Blanchard, Raphael and Qiu, Yuheng and Wang, Wenshan and Scherer, Sebastian}, title = {Tartan IMU: A Light Foundation Model for Inertial Positioning in Robotics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22520-22529} }
Event Ellipsometer: Event-based Mueller-Matrix Video Imaging: Ryota Maeda,

Yunseong Moon,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maeda_2025_CVPR, author = {Maeda, Ryota and Moon, Yunseong and Baek, Seung-Hwan}, title = {Event Ellipsometer: Event-based Mueller-Matrix Video Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21804-21813} }
End-to-End HOI Reconstruction Transformer with Graph-based Encoding: Zhenrong Wang,

Qi Zheng,

Sihan Ma,

Maosheng Ye,

Yibing Zhan,

Dongjiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhenrong and Zheng, Qi and Ma, Sihan and Ye, Maosheng and Zhan, Yibing and Li, Dongjiang}, title = {End-to-End HOI Reconstruction Transformer with Graph-based Encoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27706-27715} }
Disco4D: Disentangled 4D Human Generation and Animation from a Single Image: Hui En Pang,

Shuai Liu,

Zhongang Cai,

Lei Yang,

Tianwei Zhang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Hui En and Liu, Shuai and Cai, Zhongang and Yang, Lei and Zhang, Tianwei and Liu, Ziwei}, title = {Disco4D: Disentangled 4D Human Generation and Animation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26331-26344} }
IDOL: Instant Photorealistic 3D Human Creation from a Single Image: Yiyu Zhuang,

Jiaxi Lv,

Hao Wen,

Qing Shuai,

Ailing Zeng,

Hao Zhu,

Shifeng Chen,

Yujiu Yang,

Xun Cao,

Wei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhuang_2025_CVPR, author = {Zhuang, Yiyu and Lv, Jiaxi and Wen, Hao and Shuai, Qing and Zeng, Ailing and Zhu, Hao and Chen, Shifeng and Yang, Yujiu and Cao, Xun and Liu, Wei}, title = {IDOL: Instant Photorealistic 3D Human Creation from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26308-26319} }
SketchVideo: Sketch-based Video Generation and Editing: Feng-Lin Liu,

Hongbo Fu,

Xintao Wang,

Weicai Ye,

Pengfei Wan,

Di Zhang,

Lin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Feng-Lin and Fu, Hongbo and Wang, Xintao and Ye, Weicai and Wan, Pengfei and Zhang, Di and Gao, Lin}, title = {SketchVideo: Sketch-based Video Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23379-23390} }
Taste More, Taste Better: Diverse Data and Strong Model Boost Semi-Supervised Crowd Counting: Maochen Yang,

Zekun Li,

Jian Zhang,

Lei Qi,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Maochen and Li, Zekun and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Taste More, Taste Better: Diverse Data and Strong Model Boost Semi-Supervised Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24440-24451} }
AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models: Xinghui Li,

Qichao Sun,

Pengze Zhang,

Fulong Ye,

Zhichao Liao,

Wanquan Feng,

Songtao Zhao,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xinghui and Sun, Qichao and Zhang, Pengze and Ye, Fulong and Liao, Zhichao and Feng, Wanquan and Zhao, Songtao and He, Qian}, title = {AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23723-23733} }
Latent Space Imaging: Matheus Souza,

Yidan Zheng,

Kaizhang Kang,

Yogeshwar Nath Mishra,

Qiang Fu,

Wolfgang Heidrich; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Souza_2025_CVPR, author = {Souza, Matheus and Zheng, Yidan and Kang, Kaizhang and Mishra, Yogeshwar Nath and Fu, Qiang and Heidrich, Wolfgang}, title = {Latent Space Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28295-28305} }
Balanced Direction from Multifarious Choices: Arithmetic Meta-Learning for Domain Generalization: Xiran Wang,

Jian Zhang,

Lei Qi,

Yinghuan Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xiran and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {Balanced Direction from Multifarious Choices: Arithmetic Meta-Learning for Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30577-30587} }
Anatomical Consistency and Adaptive Prior-informed Transformation for Multi-contrast MR Image Synthesis via Diffusion Model: Yejee Shin,

Yeeun Lee,

Hanbyol Jang,

Geonhui Son,

Hyeongyu Kim,

Dosik Hwang; [pdf] [supp]
[bibtex]
@InProceedings{Shin_2025_CVPR, author = {Shin, Yejee and Lee, Yeeun and Jang, Hanbyol and Son, Geonhui and Kim, Hyeongyu and Hwang, Dosik}, title = {Anatomical Consistency and Adaptive Prior-informed Transformation for Multi-contrast MR Image Synthesis via Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30918-30927} }
SeCap: Self-Calibrating and Adaptive Prompts for Cross-view Person Re-Identification in Aerial-Ground Networks: Shining Wang,

Yunlong Wang,

Ruiqi Wu,

Bingliang Jiao,

Wenxuan Wang,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shining and Wang, Yunlong and Wu, Ruiqi and Jiao, Bingliang and Wang, Wenxuan and Wang, Peng}, title = {SeCap: Self-Calibrating and Adaptive Prompts for Cross-view Person Re-Identification in Aerial-Ground Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22119-22128} }
Don't Shake the Wheel: Momentum-Aware Planning in End-to-End Autonomous Driving: Ziying Song,

Caiyan Jia,

Lin Liu,

Hongyu Pan,

Yongchang Zhang,

Junming Wang,

Xingyu Zhang,

Shaoqing Xu,

Lei Yang,

Yadan Luo; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Ziying and Jia, Caiyan and Liu, Lin and Pan, Hongyu and Zhang, Yongchang and Wang, Junming and Zhang, Xingyu and Xu, Shaoqing and Yang, Lei and Luo, Yadan}, title = {Don't Shake the Wheel: Momentum-Aware Planning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22432-22441} }
Neural Motion Simulator Pushing the Limit of World Models in Reinforcement Learning: Chenjie Hao,

Weyl Lu,

Yifan Xu,

Yubei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2025_CVPR, author = {Hao, Chenjie and Lu, Weyl and Xu, Yifan and Chen, Yubei}, title = {Neural Motion Simulator Pushing the Limit of World Models in Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27608-27617} }
Adversarial Diffusion Compression for Real-World Image Super-Resolution: Bin Chen,

Gehui Li,

Rongyuan Wu,

Xindong Zhang,

Jie Chen,

Jian Zhang,

Lei Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Bin and Li, Gehui and Wu, Rongyuan and Zhang, Xindong and Chen, Jie and Zhang, Jian and Zhang, Lei}, title = {Adversarial Diffusion Compression for Real-World Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28208-28220} }
DiSciPLE: Learning Interpretable Programs for Scientific Visual Discovery: Utkarsh Mall,

Cheng Perng Phoo,

Mia Chiquier,

Bharath Hariharan,

Kavita Bala,

Carl Vondrick; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mall_2025_CVPR, author = {Mall, Utkarsh and Phoo, Cheng Perng and Chiquier, Mia and Hariharan, Bharath and Bala, Kavita and Vondrick, Carl}, title = {DiSciPLE: Learning Interpretable Programs for Scientific Visual Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29258-29267} }
SOLAMI: Social Vision-Language-Action Modeling for Immersive Interaction with 3D Autonomous Characters: Jianping Jiang,

Weiye Xiao,

Zhengyu Lin,

Huaizhong Zhang,

Tianxiang Ren,

Yang Gao,

Zhiqian Lin,

Zhongang Cai,

Lei Yang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Jianping and Xiao, Weiye and Lin, Zhengyu and Zhang, Huaizhong and Ren, Tianxiang and Gao, Yang and Lin, Zhiqian and Cai, Zhongang and Yang, Lei and Liu, Ziwei}, title = {SOLAMI: Social Vision-Language-Action Modeling for Immersive Interaction with 3D Autonomous Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26887-26898} }
EntropyMark: Towards More Harmless Backdoor Watermark via Entropy-based Constraint for Open-source Dataset Copyright Protection: Ming Sun,

Rui Wang,

Zixuan Zhu,

Lihua Jing,

Yuanfang Guo; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Ming and Wang, Rui and Zhu, Zixuan and Jing, Lihua and Guo, Yuanfang}, title = {EntropyMark: Towards More Harmless Backdoor Watermark via Entropy-based Constraint for Open-source Dataset Copyright Protection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30692-30701} }
Adaptive Markup Language Generation for Contextually-Grounded Visual Document Understanding: Han Xiao,

Yina Xie,

Guanxin Tan,

Yinghao Chen,

Rui Hu,

Ke Wang,

Aojun Zhou,

Hao Li,

Hao Shao,

Xudong Lu,

Peng Gao,

Yafei Wen,

Xiaoxin Chen,

Shuai Ren,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Han and Xie, Yina and Tan, Guanxin and Chen, Yinghao and Hu, Rui and Wang, Ke and Zhou, Aojun and Li, Hao and Shao, Hao and Lu, Xudong and Gao, Peng and Wen, Yafei and Chen, Xiaoxin and Ren, Shuai and Li, Hongsheng}, title = {Adaptive Markup Language Generation for Contextually-Grounded Visual Document Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29558-29568} }
Towards Universal AI-Generated Image Detection by Variational Information Bottleneck Network: Haifeng Zhang,

Qinghui He,

Xiuli Bi,

Weisheng Li,

Bo Liu,

Bin Xiao; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Haifeng and He, Qinghui and Bi, Xiuli and Li, Weisheng and Liu, Bo and Xiao, Bin}, title = {Towards Universal AI-Generated Image Detection by Variational Information Bottleneck Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23828-23837} }
HSI: A Holistic Style Injector for Arbitrary Style Transfer: Shuhao Zhang,

Hui Kang,

Yang Liu,

Fang Mei,

Hongjuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shuhao and Kang, Hui and Liu, Yang and Mei, Fang and Li, Hongjuan}, title = {HSI: A Holistic Style Injector for Arbitrary Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23433-23442} }
V2V3D: View-to-View Denoised 3D Reconstruction for Light Field Microscopy: Jiayin Zhao,

Zhenqi Fu,

Tao Yu,

Hui Qiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jiayin and Fu, Zhenqi and Yu, Tao and Qiao, Hui}, title = {V2V3D: View-to-View Denoised 3D Reconstruction for Light Field Microscopy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26451-26461} }
Splatter-360: Generalizable 360 Gaussian Splatting for Wide-baseline Panoramic Images: Zheng Chen,

Chenming Wu,

Zhelun Shen,

Chen Zhao,

Weicai Ye,

Haocheng Feng,

Errui Ding,

Song-Hai Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zheng and Wu, Chenming and Shen, Zhelun and Zhao, Chen and Ye, Weicai and Feng, Haocheng and Ding, Errui and Zhang, Song-Hai}, title = {Splatter-360: Generalizable 360 Gaussian Splatting for Wide-baseline Panoramic Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21590-21599} }
Towards Understanding How Knowledge Evolves in Large Vision-Language Models: Sudong Wang,

Yunjian Zhang,

Yao Zhu,

Jianing Li,

Zizhe Wang,

Yanwei Liu,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Sudong and Zhang, Yunjian and Zhu, Yao and Li, Jianing and Wang, Zizhe and Liu, Yanwei and Ji, Xiangyang}, title = {Towards Understanding How Knowledge Evolves in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29858-29868} }
A Unified, Resilient, and Explainable Adversarial Patch Detector: Vishesh Kumar,

Akshay Agarwal; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2025_CVPR, author = {Kumar, Vishesh and Agarwal, Akshay}, title = {A Unified, Resilient, and Explainable Adversarial Patch Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30387-30397} }
Structured 3D Latents for Scalable and Versatile 3D Generation: Jianfeng Xiang,

Zelong Lv,

Sicheng Xu,

Yu Deng,

Ruicheng Wang,

Bowen Zhang,

Dong Chen,

Xin Tong,

Jiaolong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_CVPR, author = {Xiang, Jianfeng and Lv, Zelong and Xu, Sicheng and Deng, Yu and Wang, Ruicheng and Zhang, Bowen and Chen, Dong and Tong, Xin and Yang, Jiaolong}, title = {Structured 3D Latents for Scalable and Versatile 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21469-21480} }
Self-Cross Diffusion Guidance for Text-to-Image Synthesis of Similar Subjects: Weimin Qiu,

Jieke Wang,

Meng Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Weimin and Wang, Jieke and Tang, Meng}, title = {Self-Cross Diffusion Guidance for Text-to-Image Synthesis of Similar Subjects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23528-23538} }
Adv-CPG: A Customized Portrait Generation Framework with Facial Adversarial Attacks: Junying Wang,

Hongyuan Zhang,

Yuan Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Junying and Zhang, Hongyuan and Yuan, Yuan}, title = {Adv-CPG: A Customized Portrait Generation Framework with Facial Adversarial Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21001-21010} }
Fish-Vista: A Multi-Purpose Dataset for Understanding & Identification of Traits from Images: Kazi Sajeed Mehrab,

M. Maruf,

Arka Daw,

Abhilash Neog,

Harish Babu Manogaran,

Mridul Khurana,

Zhenyang Feng,

Bahadir Altintas,

Yasin Bakis,

Elizabeth G Campolongo,

Matthew J Thompson,

Xiaojun Wang,

Hilmar Lapp,

Tanya Berger-Wolf,

Paula Mabee,

Henry Bart,

Wei-Lun Chao,

Wasila M Dahdul,

Anuj Karpatne; [pdf] [supp]
[bibtex]
@InProceedings{Mehrab_2025_CVPR, author = {Mehrab, Kazi Sajeed and Maruf, M. and Daw, Arka and Neog, Abhilash and Manogaran, Harish Babu and Khurana, Mridul and Feng, Zhenyang and Altintas, Bahadir and Bakis, Yasin and Campolongo, Elizabeth G and Thompson, Matthew J and Wang, Xiaojun and Lapp, Hilmar and Berger-Wolf, Tanya and Mabee, Paula and Bart, Henry and Chao, Wei-Lun and Dahdul, Wasila M and Karpatne, Anuj}, title = {Fish-Vista: A Multi-Purpose Dataset for Understanding \& Identification of Traits from Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24275-24285} }
PCM : Picard Consistency Model for Fast Parallel Sampling of Diffusion Models: Junhyuk So,

Jiwoong Shin,

Chaeyeon Jang,

Eunhyeok Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{So_2025_CVPR, author = {So, Junhyuk and Shin, Jiwoong and Jang, Chaeyeon and Park, Eunhyeok}, title = {PCM : Picard Consistency Model for Fast Parallel Sampling of Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23313-23322} }
CoMapGS: Covisibility Map-based Gaussian Splatting for Sparse Novel View Synthesis: Youngkyoon Jang,

Eduardo Pérez-Pellitero; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Youngkyoon and P\'erez-Pellitero, Eduardo}, title = {CoMapGS: Covisibility Map-based Gaussian Splatting for Sparse Novel View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26779-26788} }
Training Data Provenance Verification: Did Your Model Use Synthetic Data from My Generative Model for Training?: Yuechen Xie,

Jie Song,

Huiqiong Wang,

Mingli Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yuechen and Song, Jie and Wang, Huiqiong and Song, Mingli}, title = {Training Data Provenance Verification: Did Your Model Use Synthetic Data from My Generative Model for Training?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23817-23827} }
Improving the Training of Data-Efficient GANs via Quality Aware Dynamic Discriminator Rejection Sampling: Zhaoyu Zhang,

Yang Hua,

Guanxiong Sun,

Hui Wang,

Seán McLoone; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhaoyu and Hua, Yang and Sun, Guanxiong and Wang, Hui and McLoone, Se\'an}, title = {Improving the Training of Data-Efficient GANs via Quality Aware Dynamic Discriminator Rejection Sampling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30682-30691} }
MotionStone: Decoupled Motion Intensity Modulation with Diffusion Transformer for Image-to-Video Generation: Shuwei Shi,

Biao Gong,

Xi Chen,

Dandan Zheng,

Shuai Tan,

Zizheng Yang,

Yuyuan Li,

Jingwen He,

Kecheng Zheng,

Jingdong Chen,

Ming Yang,

Yinqiang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Shuwei and Gong, Biao and Chen, Xi and Zheng, Dandan and Tan, Shuai and Yang, Zizheng and Li, Yuyuan and He, Jingwen and Zheng, Kecheng and Chen, Jingdong and Yang, Ming and Zheng, Yinqiang}, title = {MotionStone: Decoupled Motion Intensity Modulation with Diffusion Transformer for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22864-22874} }
Advancing Generalizable Tumor Segmentation with Anomaly-Aware Open-Vocabulary Attention Maps and Frozen Foundation Diffusion Models: Yankai Jiang,

Peng Zhang,

Donglin Yang,

Yuan Tian,

Hai Lin,

Xiaosong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yankai and Zhang, Peng and Yang, Donglin and Tian, Yuan and Lin, Hai and Wang, Xiaosong}, title = {Advancing Generalizable Tumor Segmentation with Anomaly-Aware Open-Vocabulary Attention Maps and Frozen Foundation Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25971-25981} }
Towards Generalizable Scene Change Detection: Jae-Woo Kim,

Ue-Hwan Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jae-Woo and Kim, Ue-Hwan}, title = {Towards Generalizable Scene Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24463-24473} }
Incomplete Multi-modal Brain Tumor Segmentation via Learnable Sorting State Space Model: Zheyu Zhang,

Yayuan Lu,

Feipeng Ma,

Yueyi Zhang,

Huanjing Yue,

Xiaoyan Sun; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zheyu and Lu, Yayuan and Ma, Feipeng and Zhang, Yueyi and Yue, Huanjing and Sun, Xiaoyan}, title = {Incomplete Multi-modal Brain Tumor Segmentation via Learnable Sorting State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25982-25992} }
FedAWA: Adaptive Optimization of Aggregation Weights in Federated Learning Using Client Vectors: Changlong Shi,

He Zhao,

Bingjie Zhang,

Mingyuan Zhou,

Dandan Guo,

Yi Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Changlong and Zhao, He and Zhang, Bingjie and Zhou, Mingyuan and Guo, Dandan and Chang, Yi}, title = {FedAWA: Adaptive Optimization of Aggregation Weights in Federated Learning Using Client Vectors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30651-30660} }
Rethinking Diffusion for Text-Driven Human Motion Generation: Redundant Representations, Evaluation, and Masked Autoregression: Zichong Meng,

Yiming Xie,

Xiaogang Peng,

Zeyu Han,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Zichong and Xie, Yiming and Peng, Xiaogang and Han, Zeyu and Jiang, Huaizu}, title = {Rethinking Diffusion for Text-Driven Human Motion Generation: Redundant Representations, Evaluation, and Masked Autoregression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27859-27871} }
Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget: Vikash Sehwag,

Xianghao Kong,

Jingtao Li,

Michael Spranger,

Lingjuan Lyu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sehwag_2025_CVPR, author = {Sehwag, Vikash and Kong, Xianghao and Li, Jingtao and Spranger, Michael and Lyu, Lingjuan}, title = {Stretching Each Dollar: Diffusion Training from Scratch on a Micro-Budget}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28596-28608} }
Guiding Human-Object Interactions with Rich Geometry and Relations: Mengqing Xue,

Yifei Liu,

Ling Guo,

Shaoli Huang,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Mengqing and Liu, Yifei and Guo, Ling and Huang, Shaoli and Ding, Changxing}, title = {Guiding Human-Object Interactions with Rich Geometry and Relations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22714-22723} }
CADDreamer: CAD Object Generation from Single-view Images: Yuan Li,

Cheng Lin,

Yuan Liu,

Xiaoxiao Long,

Chenxu Zhang,

Ningna Wang,

Xin Li,

Wenping Wang,

Xiaohu Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuan and Lin, Cheng and Liu, Yuan and Long, Xiaoxiao and Zhang, Chenxu and Wang, Ningna and Li, Xin and Wang, Wenping and Guo, Xiaohu}, title = {CADDreamer: CAD Object Generation from Single-view Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21448-21457} }
Where's the Liability in the Generative Era? Recovery-based Black-Box Detection of AI-Generated Content: Haoyue Bai,

Yiyou Sun,

Wei Cheng,

Haifeng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Haoyue and Sun, Yiyou and Cheng, Wei and Chen, Haifeng}, title = {Where's the Liability in the Generative Era? Recovery-based Black-Box Detection of AI-Generated Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28821-28830} }
DiTASK: Multi-Task Fine-Tuning with Diffeomorphic Transformations: Krishna Sri Ipsit Mantri,

Carola-Bibiane Schönlieb,

Bruno Ribeiro,

Chaim Baskin,

Moshe Eliasof; [pdf] [supp]
[bibtex]
@InProceedings{Mantri_2025_CVPR, author = {Mantri, Krishna Sri Ipsit and Sch\"onlieb, Carola-Bibiane and Ribeiro, Bruno and Baskin, Chaim and Eliasof, Moshe}, title = {DiTASK: Multi-Task Fine-Tuning with Diffeomorphic Transformations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25218-25229} }
OW-OVD: Unified Open World and Open Vocabulary Object Detection: Xing Xi,

Yangyang Huang,

Ronghua Luo,

Yu Qiu; [pdf] [supp]
[bibtex]
@InProceedings{Xi_2025_CVPR, author = {Xi, Xing and Huang, Yangyang and Luo, Ronghua and Qiu, Yu}, title = {OW-OVD: Unified Open World and Open Vocabulary Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25454-25464} }
Improving Diffusion Inverse Problem Solving with Decoupled Noise Annealing: Bingliang Zhang,

Wenda Chu,

Julius Berner,

Chenlin Meng,

Anima Anandkumar,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Bingliang and Chu, Wenda and Berner, Julius and Meng, Chenlin and Anandkumar, Anima and Song, Yang}, title = {Improving Diffusion Inverse Problem Solving with Decoupled Noise Annealing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20895-20905} }
DesignDiffusion: High-Quality Text-to-Design Image Generation with Diffusion Models: Zhendong Wang,

Jianmin Bao,

Shuyang Gu,

Dong Chen,

Wengang Zhou,

Houqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zhendong and Bao, Jianmin and Gu, Shuyang and Chen, Dong and Zhou, Wengang and Li, Houqiang}, title = {DesignDiffusion: High-Quality Text-to-Design Image Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20906-20915} }
SAR3D: Autoregressive 3D Object Generation and Understanding via Multi-scale 3D VQVAE: Yongwei Chen,

Yushi Lan,

Shangchen Zhou,

Tengfei Wang,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yongwei and Lan, Yushi and Zhou, Shangchen and Wang, Tengfei and Pan, Xingang}, title = {SAR3D: Autoregressive 3D Object Generation and Understanding via Multi-scale 3D VQVAE}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28371-28382} }
Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation: Ying Jin,

Jinlong Peng,

Qingdong He,

Teng Hu,

Jiafu Wu,

Hao Chen,

Haoxuan Wang,

Wenbing Zhu,

Mingmin Chi,

Jun Liu,

Yabiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Ying and Peng, Jinlong and He, Qingdong and Hu, Teng and Wu, Jiafu and Chen, Hao and Wang, Haoxuan and Zhu, Wenbing and Chi, Mingmin and Liu, Jun and Wang, Yabiao}, title = {Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30420-30429} }
Interactive Medical Image Analysis with Concept-based Similarity Reasoning: Ta Duc Huy,

Sen Kim Tran,

Phan Nguyen,

Nguyen Hoang Tran,

Tran Bao Sam,

Anton van den Hengel,

Zhibin Liao,

Johan W. Verjans,

Minh-Son To,

Vu Minh Hieu Phan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huy_2025_CVPR, author = {Huy, Ta Duc and Tran, Sen Kim and Nguyen, Phan and Tran, Nguyen Hoang and Sam, Tran Bao and van den Hengel, Anton and Liao, Zhibin and Verjans, Johan W. and To, Minh-Son and Phan, Vu Minh Hieu}, title = {Interactive Medical Image Analysis with Concept-based Similarity Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30797-30806} }
h-Edit: Effective and Flexible Diffusion-Based Editing via Doob's h-Transform: Toan Nguyen,

Kien Do,

Duc Kieu,

Thin Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Toan and Do, Kien and Kieu, Duc and Nguyen, Thin}, title = {h-Edit: Effective and Flexible Diffusion-Based Editing via Doob's h-Transform}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28490-28501} }
Are Spatial-Temporal Graph Convolution Networks for Human Action Recognition Over-Parameterized?: Jianyang Xie,

Yitian Zhao,

Yanda Meng,

He Zhao,

Anh Nguyen,

Yalin Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Jianyang and Zhao, Yitian and Meng, Yanda and Zhao, He and Nguyen, Anh and Zheng, Yalin}, title = {Are Spatial-Temporal Graph Convolution Networks for Human Action Recognition Over-Parameterized?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24309-24319} }
Spectral State Space Model for Rotation-Invariant Visual Representation Learning: Sahar Dastani,

Ali Bahri,

Moslem Yazdanpanah,

Mehrdad Noori,

David Osowiechi,

Gustavo Adolfo Vargas Hakim,

Farzad Beizaee,

Milad Cheraghalikhani,

Arnab Kumar Mondal,

Herve Lombaert,

Christian Desrosiers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dastani_2025_CVPR, author = {Dastani, Sahar and Bahri, Ali and Yazdanpanah, Moslem and Noori, Mehrdad and Osowiechi, David and Hakim, Gustavo Adolfo Vargas and Beizaee, Farzad and Cheraghalikhani, Milad and Mondal, Arnab Kumar and Lombaert, Herve and Desrosiers, Christian}, title = {Spectral State Space Model for Rotation-Invariant Visual Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23881-23890} }
Sharp-It: A Multi-view to Multi-view Diffusion Model for 3D Synthesis and Manipulation: Yiftach Edelstein,

Or Patashnik,

Dana Cohen-Bar,

Lihi Zelnik-Manor; [pdf] [supp]
[bibtex]
@InProceedings{Edelstein_2025_CVPR, author = {Edelstein, Yiftach and Patashnik, Or and Cohen-Bar, Dana and Zelnik-Manor, Lihi}, title = {Sharp-It: A Multi-view to Multi-view Diffusion Model for 3D Synthesis and Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21458-21468} }
URWKV: Unified RWKV Model with Multi-state Perspective for Low-light Image Restoration: Rui Xu,

Yuzhen Niu,

Yuezhou Li,

Huangbiao Xu,

Wenxi Liu,

Yuzhong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Rui and Niu, Yuzhen and Li, Yuezhou and Xu, Huangbiao and Liu, Wenxi and Chen, Yuzhong}, title = {URWKV: Unified RWKV Model with Multi-state Perspective for Low-light Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21267-21276} }
Functionality Understanding and Segmentation in 3D Scenes: Jaime Corsetti,

Francesco Giuliari,

Alice Fasoli,

Davide Boscaini,

Fabio Poiesi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Corsetti_2025_CVPR, author = {Corsetti, Jaime and Giuliari, Francesco and Fasoli, Alice and Boscaini, Davide and Poiesi, Fabio}, title = {Functionality Understanding and Segmentation in 3D Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24550-24559} }
Dragin3D: Image Editing by Dragging in 3D Space: Weiran Guang,

Xiaoguang Gu,

Mengqi Huang,

Zhendong Mao; [pdf] [supp]
[bibtex]
@InProceedings{Guang_2025_CVPR, author = {Guang, Weiran and Gu, Xiaoguang and Huang, Mengqi and Mao, Zhendong}, title = {Dragin3D: Image Editing by Dragging in 3D Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21502-21512} }
Towards Stable and Storage-efficient Dataset Distillation: Matching Convexified Trajectory: Wenliang Zhong,

Haoyu Tang,

Qinghai Zheng,

Mingzhu Xu,

Yupeng Hu,

Weili Guan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Wenliang and Tang, Haoyu and Zheng, Qinghai and Xu, Mingzhu and Hu, Yupeng and Guan, Weili}, title = {Towards Stable and Storage-efficient Dataset Distillation: Matching Convexified Trajectory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25581-25589} }
TSAM: Temporal SAM Augmented with Multimodal Prompts for Referring Audio-Visual Segmentation: Abduljalil Radman,

Jorma Laaksonen; [pdf] [supp]
[bibtex]
@InProceedings{Radman_2025_CVPR, author = {Radman, Abduljalil and Laaksonen, Jorma}, title = {TSAM: Temporal SAM Augmented with Multimodal Prompts for Referring Audio-Visual Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23947-23956} }
Invisible Backdoor Attack against Self-supervised Learning: Hanrong Zhang,

Zhenting Wang,

Boheng Li,

Fulin Lin,

Tingxu Han,

Mingyu Jin,

Chenlu Zhan,

Mengnan Du,

Hongwei Wang,

Shiqing Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hanrong and Wang, Zhenting and Li, Boheng and Lin, Fulin and Han, Tingxu and Jin, Mingyu and Zhan, Chenlu and Du, Mengnan and Wang, Hongwei and Ma, Shiqing}, title = {Invisible Backdoor Attack against Self-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25790-25801} }
Perceptually Accurate 3D Talking Head Generation: New Definitions, Speech-Mesh Representation, and Evaluation Metrics: Lee Chae-Yeon,

Oh Hyun-Bin,

Han EunGi,

Kim Sung-Bin,

Suekyeong Nam,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chae-Yeon_2025_CVPR, author = {Chae-Yeon, Lee and Hyun-Bin, Oh and EunGi, Han and Sung-Bin, Kim and Nam, Suekyeong and Oh, Tae-Hyun}, title = {Perceptually Accurate 3D Talking Head Generation: New Definitions, Speech-Mesh Representation, and Evaluation Metrics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21065-21074} }
BWFormer: Building Wireframe Reconstruction from Airborne LiDAR Point Cloud with Transformer: Yuzhou Liu,

Lingjie Zhu,

Hanqiao Ye,

Shangfeng Huang,

Xiang Gao,

Xianwei Zheng,

Shuhan Shen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuzhou and Zhu, Lingjie and Ye, Hanqiao and Huang, Shangfeng and Gao, Xiang and Zheng, Xianwei and Shen, Shuhan}, title = {BWFormer: Building Wireframe Reconstruction from Airborne LiDAR Point Cloud with Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22215-22224} }
Diffusion-4K: Ultra-High-Resolution Image Synthesis with Latent Diffusion Models: Jinjin Zhang,

Qiuyu Huang,

Junjie Liu,

Xiefan Guo,

Di Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinjin and Huang, Qiuyu and Liu, Junjie and Guo, Xiefan and Huang, Di}, title = {Diffusion-4K: Ultra-High-Resolution Image Synthesis with Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23464-23473} }
OmniDrive: A Holistic Vision-Language Dataset for Autonomous Driving with Counterfactual Reasoning: Shihao Wang,

Zhiding Yu,

Xiaohui Jiang,

Shiyi Lan,

Min Shi,

Nadine Chang,

Jan Kautz,

Ying Li,

Jose M. Alvarez; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shihao and Yu, Zhiding and Jiang, Xiaohui and Lan, Shiyi and Shi, Min and Chang, Nadine and Kautz, Jan and Li, Ying and Alvarez, Jose M.}, title = {OmniDrive: A Holistic Vision-Language Dataset for Autonomous Driving with Counterfactual Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22442-22452} }
MeGA: Hybrid Mesh-Gaussian Head Avatar for High-Fidelity Rendering and Head Editing: Cong Wang,

Di Kang,

Heyi Sun,

Shenhan Qian,

Zixuan Wang,

Linchao Bao,

Song-Hai Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Cong and Kang, Di and Sun, Heyi and Qian, Shenhan and Wang, Zixuan and Bao, Linchao and Zhang, Song-Hai}, title = {MeGA: Hybrid Mesh-Gaussian Head Avatar for High-Fidelity Rendering and Head Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26274-26284} }
Comprehensive Information Bottleneck for Unveiling Universal Attribution to Interpret Vision Transformers: Jung-Ho Hong,

Ho-Joong Kim,

Kyu-Sung Jeon,

Seong-Whan Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Jung-Ho and Kim, Ho-Joong and Jeon, Kyu-Sung and Lee, Seong-Whan}, title = {Comprehensive Information Bottleneck for Unveiling Universal Attribution to Interpret Vision Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25166-25175} }
Dataset Distillation with Neural Characteristic Function: A Minmax Perspective: Shaobo Wang,

Yicun Yang,

Zhiyuan Liu,

Chenghao Sun,

Xuming Hu,

Conghui He,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shaobo and Yang, Yicun and Liu, Zhiyuan and Sun, Chenghao and Hu, Xuming and He, Conghui and Zhang, Linfeng}, title = {Dataset Distillation with Neural Characteristic Function: A Minmax Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25570-25580} }
Free-viewpoint Human Animation with Pose-correlated Reference Selection: Fa-Ting Hong,

Zhan Xu,

Haiyang Liu,

Qinjie Lin,

Luchuan Song,

Zhixin Shu,

Yang Zhou,

Duygu Ceylan,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Fa-Ting and Xu, Zhan and Liu, Haiyang and Lin, Qinjie and Song, Luchuan and Shu, Zhixin and Zhou, Yang and Ceylan, Duygu and Xu, Dan}, title = {Free-viewpoint Human Animation with Pose-correlated Reference Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26253-26262} }
PillarHist: A Quantization-aware Pillar Feature Encoder based on Height-aware Histogram: Sifan Zhou,

Zhihang Yuan,

Dawei Yang,

Xing Hu,

Jian Qian,

Ziyu Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Sifan and Yuan, Zhihang and Yang, Dawei and Hu, Xing and Qian, Jian and Zhao, Ziyu}, title = {PillarHist: A Quantization-aware Pillar Feature Encoder based on Height-aware Histogram}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27336-27345} }
Semantic and Expressive Variations in Image Captions Across Languages: Andre Ye,

Sebastin Santy,

Jena D. Hwang,

Amy X. Zhang,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Andre and Santy, Sebastin and Hwang, Jena D. and Zhang, Amy X. and Krishna, Ranjay}, title = {Semantic and Expressive Variations in Image Captions Across Languages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29667-29679} }
ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models: Xubing Ye,

Yukang Gan,

Yixiao Ge,

Xiao-Ping Zhang,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Xubing and Gan, Yukang and Ge, Yixiao and Zhang, Xiao-Ping and Tang, Yansong}, title = {ATP-LLaVA: Adaptive Token Pruning for Large Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24972-24982} }
ADD: Attribution-Driven Data Augmentation Framework for Boosting Image Super-Resolution: Ze-Yu Mi,

Yu-Bin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Mi_2025_CVPR, author = {Mi, Ze-Yu and Yang, Yu-Bin}, title = {ADD: Attribution-Driven Data Augmentation Framework for Boosting Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23101-23110} }
CroCoDL: Cross-device Collaborative Dataset for Localization: Hermann Blum,

Alessandro Mercurio,

Joshua O'Reilly,

Tim Engelbracht,

Mihai Dusmanu,

Marc Pollefeys,

Zuria Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Blum_2025_CVPR, author = {Blum, Hermann and Mercurio, Alessandro and O'Reilly, Joshua and Engelbracht, Tim and Dusmanu, Mihai and Pollefeys, Marc and Bauer, Zuria}, title = {CroCoDL: Cross-device Collaborative Dataset for Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27424-27434} }
CLIP is Almost All You Need: Towards Parameter-Efficient Scene Text Retrieval without OCR: Xugong Qin,

Peng Zhang,

Jun Jie Ou Yang,

Gangyan Zeng,

Yubo Li,

Yuanyuan Wang,

Wanqian Zhang,

Pengwen Dai; [pdf] [supp]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Xugong and Zhang, Peng and Yang, Jun Jie Ou and Zeng, Gangyan and Li, Yubo and Wang, Yuanyuan and Zhang, Wanqian and Dai, Pengwen}, title = {CLIP is Almost All You Need: Towards Parameter-Efficient Scene Text Retrieval without OCR}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24873-24883} }
What Makes a Good Dataset for Knowledge Distillation?: Logan Frank,

Jim Davis; [pdf] [arXiv]
[bibtex]
@InProceedings{Frank_2025_CVPR, author = {Frank, Logan and Davis, Jim}, title = {What Makes a Good Dataset for Knowledge Distillation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23755-23764} }
Rectification-specific Supervision and Constrained Estimator for Online Stereo Rectification: Rui Gong,

Kim-Hui Yap,

Weide Liu,

Xulei Yang,

Jun Cheng; [pdf]
[bibtex]
@InProceedings{Gong_2025_CVPR, author = {Gong, Rui and Yap, Kim-Hui and Liu, Weide and Yang, Xulei and Cheng, Jun}, title = {Rectification-specific Supervision and Constrained Estimator for Online Stereo Rectification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22348-22358} }
Shape and Texture: What Influences Reliable Optical Flow Estimation?: Libo Long,

Xiao Hu,

Jochen Lang; [pdf] [supp]
[bibtex]
@InProceedings{Long_2025_CVPR, author = {Long, Libo and Hu, Xiao and Lang, Jochen}, title = {Shape and Texture: What Influences Reliable Optical Flow Estimation?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27894-27903} }
Precise, Fast, and Low-cost Concept Erasure in Value Space: Orthogonal Complement Matters: Yuan Wang,

Ouxiang Li,

Tingting Mu,

Yanbin Hao,

Kuien Liu,

Xiang Wang,

Xiangnan He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuan and Li, Ouxiang and Mu, Tingting and Hao, Yanbin and Liu, Kuien and Wang, Xiang and He, Xiangnan}, title = {Precise, Fast, and Low-cost Concept Erasure in Value Space: Orthogonal Complement Matters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28759-28768} }
HOIGen-1M: A Large-scale Dataset for Human-Object Interaction Video Generation: Kun Liu,

Qi Liu,

Xinchen Liu,

Jie Li,

Yongdong Zhang,

Jiebo Luo,

Xiaodong He,

Wu Liu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Kun and Liu, Qi and Liu, Xinchen and Li, Jie and Zhang, Yongdong and Luo, Jiebo and He, Xiaodong and Liu, Wu}, title = {HOIGen-1M: A Large-scale Dataset for Human-Object Interaction Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24001-24010} }
Order-One Rolling Shutter Cameras: Marvin Anas Hahn,

Kathlén Kohn,

Orlando Marigliano,

Tomas Pajdla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hahn_2025_CVPR, author = {Hahn, Marvin Anas and Kohn, Kathl\'en and Marigliano, Orlando and Pajdla, Tomas}, title = {Order-One Rolling Shutter Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27007-27016} }
Animate and Sound an Image: Xihua Wang,

Ruihua Song,

Chongxuan Li,

Xin Cheng,

Boyuan Li,

Yihan Wu,

Yuyue Wang,

Hongteng Xu,

Yunfeng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xihua and Song, Ruihua and Li, Chongxuan and Cheng, Xin and Li, Boyuan and Wu, Yihan and Wang, Yuyue and Xu, Hongteng and Wang, Yunfeng}, title = {Animate and Sound an Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23369-23378} }
Foveated Instance Segmentation: Hongyi Zeng,

Wenxuan Liu,

Tianhua Xia,

Jinhui Chen,

Ziyun Li,

Sai Qian Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Hongyi and Liu, Wenxuan and Xia, Tianhua and Chen, Jinhui and Li, Ziyun and Zhang, Sai Qian}, title = {Foveated Instance Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24496-24505} }
Emphasizing Discriminative Features for Dataset Distillation in Complex Scenarios: Kai Wang,

Zekai Li,

Zhi-Qi Cheng,

Samir Khaki,

Ahmad Sajedi,

Ramakrishna Vedantam,

Konstantinos N Plataniotis,

Alexander Hauptmann,

Yang You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Kai and Li, Zekai and Cheng, Zhi-Qi and Khaki, Samir and Sajedi, Ahmad and Vedantam, Ramakrishna and Plataniotis, Konstantinos N and Hauptmann, Alexander and You, Yang}, title = {Emphasizing Discriminative Features for Dataset Distillation in Complex Scenarios}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30451-30461} }
Segment This Thing: Foveated Tokenization for Efficient Point-Prompted Segmentation: Tanner Schmidt,

Richard Newcombe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schmidt_2025_CVPR, author = {Schmidt, Tanner and Newcombe, Richard}, title = {Segment This Thing: Foveated Tokenization for Efficient Point-Prompted Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29428-29437} }
Task-Specific Gradient Adaptation for Few-Shot One-Class Classification: Yunlong Li,

Xiabi Liu,

Liyuan Pan,

Yuchen Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yunlong and Liu, Xiabi and Pan, Liyuan and Ren, Yuchen}, title = {Task-Specific Gradient Adaptation for Few-Shot One-Class Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30556-30565} }
3D Gaussian Inpainting with Depth-Guided Cross-View Consistency: Sheng-Yu Huang,

Zi-Ting Chou,

Yu-Chiang Frank Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Sheng-Yu and Chou, Zi-Ting and Wang, Yu-Chiang Frank}, title = {3D Gaussian Inpainting with Depth-Guided Cross-View Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26704-26713} }
Floxels: Fast Unsupervised Voxel Based Scene Flow Estimation: David T. Hoffmann,

Syed Haseeb Raza,

Hanqiu Jiang,

Denis Tananaev,

Steffen Klingenhoefer,

Martin Meinke; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hoffmann_2025_CVPR, author = {Hoffmann, David T. and Raza, Syed Haseeb and Jiang, Hanqiu and Tananaev, Denis and Klingenhoefer, Steffen and Meinke, Martin}, title = {Floxels: Fast Unsupervised Voxel Based Scene Flow Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22328-22337} }
LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale: Joya Chen,

Ziyun Zeng,

Yiqi Lin,

Wei Li,

Zejun Ma,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Joya and Zeng, Ziyun and Lin, Yiqi and Li, Wei and Ma, Zejun and Shou, Mike Zheng}, title = {LiveCC: Learning Video LLM with Streaming Speech Transcription at Scale}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29083-29095} }
FlexiDiT: Your Diffusion Transformer Can Easily Generate High-Quality Samples with Less Compute: Sotiris Anagnostidis,

Gregor Bachmann,

Yeongmin Kim,

Jonas Kohler,

Markos Georgopoulos,

Artsiom Sanakoyeu,

Yuming Du,

Albert Pumarola,

Ali Thabet,

Edgar Schönfeld; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Anagnostidis_2025_CVPR, author = {Anagnostidis, Sotiris and Bachmann, Gregor and Kim, Yeongmin and Kohler, Jonas and Georgopoulos, Markos and Sanakoyeu, Artsiom and Du, Yuming and Pumarola, Albert and Thabet, Ali and Sch\"onfeld, Edgar}, title = {FlexiDiT: Your Diffusion Transformer Can Easily Generate High-Quality Samples with Less Compute}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28316-28326} }
HyperGLM: HyperGraph for Video Scene Graph Generation and Anticipation: Trong-Thuan Nguyen,

Pha Nguyen,

Jackson Cothren,

Alper Yilmaz,

Khoa Luu; [pdf] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Trong-Thuan and Nguyen, Pha and Cothren, Jackson and Yilmaz, Alper and Luu, Khoa}, title = {HyperGLM: HyperGraph for Video Scene Graph Generation and Anticipation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29150-29160} }
FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning: Gaojian Wang,

Feng Lin,

Tong Wu,

Zhenguang Liu,

Zhongjie Ba,

Kui Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Gaojian and Lin, Feng and Wu, Tong and Liu, Zhenguang and Ba, Zhongjie and Ren, Kui}, title = {FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24364-24376} }
AlignMamba: Enhancing Multimodal Mamba with Local and Global Cross-modal Alignment: Yan Li,

Yifei Xing,

Xiangyuan Lan,

Xin Li,

Haifeng Chen,

Dongmei Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yan and Xing, Yifei and Lan, Xiangyuan and Li, Xin and Chen, Haifeng and Jiang, Dongmei}, title = {AlignMamba: Enhancing Multimodal Mamba with Local and Global Cross-modal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24774-24784} }
VideoComp: Advancing Fine-Grained Compositional and Temporal Alignment in Video-Text Models: Dahun Kim,

AJ Piergiovanni,

Ganesh Mallya,

Anelia Angelova; [pdf] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Dahun and Piergiovanni, AJ and Mallya, Ganesh and Angelova, Anelia}, title = {VideoComp: Advancing Fine-Grained Compositional and Temporal Alignment in Video-Text Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29060-29070} }
One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion: Chunyang Cheng,

Tianyang Xu,

Zhenhua Feng,

Xiaojun Wu,

Zhangyong Tang,

Hui Li,

Zeyang Zhang,

Sara Atito,

Muhammad Awais,

Josef Kittler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Chunyang and Xu, Tianyang and Feng, Zhenhua and Wu, Xiaojun and Tang, Zhangyong and Li, Hui and Zhang, Zeyang and Atito, Sara and Awais, Muhammad and Kittler, Josef}, title = {One Model for ALL: Low-Level Task Interaction Is a Key to Task-Agnostic Image Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28102-28112} }
Can Text-to-Video Generation help Video-Language Alignment?: Luca Zanella,

Massimiliano Mancini,

Willi Menapace,

Sergey Tulyakov,

Yiming Wang,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zanella_2025_CVPR, author = {Zanella, Luca and Mancini, Massimiliano and Menapace, Willi and Tulyakov, Sergey and Wang, Yiming and Ricci, Elisa}, title = {Can Text-to-Video Generation help Video-Language Alignment?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24097-24107} }
Weakly Supervised Contrastive Adversarial Training for Learning Robust Features from Semi-supervised Data: Lilin Zhang,

Chengpei Wu,

Ning Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lilin and Wu, Chengpei and Yang, Ning}, title = {Weakly Supervised Contrastive Adversarial Training for Learning Robust Features from Semi-supervised Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25718-25727} }
From Poses to Identity: Training-Free Person Re-Identification via Feature Centralization: Chao Yuan,

Guiwei Zhang,

Changxiao Ma,

Tianyi Zhang,

Guanglin Niu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Chao and Zhang, Guiwei and Ma, Changxiao and Zhang, Tianyi and Niu, Guanglin}, title = {From Poses to Identity: Training-Free Person Re-Identification via Feature Centralization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24409-24418} }
MIMO: A Medical Vision Language Model with Visual Referring Multimodal Input and Pixel Grounding Multimodal Output: Yanyuan Chen,

Dexuan Xu,

Yu Huang,

Songkun Zhan,

Hanpin Wang,

Dongxue Chen,

Xueping Wang,

Meikang Qiu,

Hang Li; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Yanyuan and Xu, Dexuan and Huang, Yu and Zhan, Songkun and Wang, Hanpin and Chen, Dongxue and Wang, Xueping and Qiu, Meikang and Li, Hang}, title = {MIMO: A Medical Vision Language Model with Visual Referring Multimodal Input and Pixel Grounding Multimodal Output}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24732-24741} }
Bias for Action: Video Implicit Neural Representations with Bias Modulation: Alper Kayabasi,

Anil Kumar Vadathya,

Guha Balakrishnan,

Vishwanath Saragadam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kayabasi_2025_CVPR, author = {Kayabasi, Alper and Vadathya, Anil Kumar and Balakrishnan, Guha and Saragadam, Vishwanath}, title = {Bias for Action: Video Implicit Neural Representations with Bias Modulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27999-28008} }
Segment Anything, Even Occluded: Wei-En Tai,

Yu-Lin Shih,

Cheng Sun,

Yu-Chiang Frank Wang,

Hwann-Tzong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tai_2025_CVPR, author = {Tai, Wei-En and Shih, Yu-Lin and Sun, Cheng and Wang, Yu-Chiang Frank and Chen, Hwann-Tzong}, title = {Segment Anything, Even Occluded}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29385-29394} }
LOGICZSL: Exploring Logic-induced Representation for Compositional Zero-shot Learning: Peng Wu,

Xiankai Lu,

Hao Hu,

Yongqin Xian,

Jianbing Shen,

Wenguan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Peng and Lu, Xiankai and Hu, Hao and Xian, Yongqin and Shen, Jianbing and Wang, Wenguan}, title = {LOGICZSL: Exploring Logic-induced Representation for Compositional Zero-shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30301-30311} }
Universal Actions for Enhanced Embodied Foundation Models: Jinliang Zheng,

Jianxiong Li,

Dongxiu Liu,

Yinan Zheng,

Zhihao Wang,

Zhonghong Ou,

Yu Liu,

Jingjing Liu,

Ya-Qin Zhang,

Xianyuan Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Jinliang and Li, Jianxiong and Liu, Dongxiu and Zheng, Yinan and Wang, Zhihao and Ou, Zhonghong and Liu, Yu and Liu, Jingjing and Zhang, Ya-Qin and Zhan, Xianyuan}, title = {Universal Actions for Enhanced Embodied Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22508-22519} }
FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolution: Junyang Chen,

Jinshan Pan,

Jiangxin Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Junyang and Pan, Jinshan and Dong, Jiangxin}, title = {FaithDiff: Unleashing Diffusion Priors for Faithful Image Super-resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28188-28197} }
Scene-agnostic Pose Regression for Visual Localization: Junwei Zheng,

Ruiping Liu,

Yufan Chen,

Zhenfang Chen,

Kailun Yang,

Jiaming Zhang,

Rainer Stiefelhagen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Junwei and Liu, Ruiping and Chen, Yufan and Chen, Zhenfang and Yang, Kailun and Zhang, Jiaming and Stiefelhagen, Rainer}, title = {Scene-agnostic Pose Regression for Visual Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27092-27102} }
Divide and Conquer: Heterogeneous Noise Integration for Diffusion-based Adversarial Purification: Gaozheng Pei,

Shaojie Lyu,

Gong Chen,

Ke Ma,

Qianqian Xu,

Yingfei Sun,

Qingming Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Pei_2025_CVPR, author = {Pei, Gaozheng and Lyu, Shaojie and Chen, Gong and Ma, Ke and Xu, Qianqian and Sun, Yingfei and Huang, Qingming}, title = {Divide and Conquer: Heterogeneous Noise Integration for Diffusion-based Adversarial Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29268-29277} }
SEC-Prompt:SEmantic Complementary Prompting for Few-Shot Class-Incremental Learning: Ye Liu,

Meng Yang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Ye and Yang, Meng}, title = {SEC-Prompt:SEmantic Complementary Prompting for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25643-25656} }
LiMoE: Mixture of LiDAR Representation Learners from Automotive Scenes: Xiang Xu,

Lingdong Kong,

Hui Shuai,

Liang Pan,

Ziwei Liu,

Qingshan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Xiang and Kong, Lingdong and Shuai, Hui and Pan, Liang and Liu, Ziwei and Liu, Qingshan}, title = {LiMoE: Mixture of LiDAR Representation Learners from Automotive Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27368-27379} }
PI-HMR: Towards Robust In-bed Temporal Human Shape Reconstruction with Contact Pressure Sensing: Ziyu Wu,

Yufan Xiong,

Mengting Niu,

Fangting Xie,

Quan Wan,

Qijun Ying,

Boyan Liu,

Xiaohui Cai; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Ziyu and Xiong, Yufan and Niu, Mengting and Xie, Fangting and Wan, Quan and Ying, Qijun and Liu, Boyan and Cai, Xiaohui}, title = {PI-HMR: Towards Robust In-bed Temporal Human Shape Reconstruction with Contact Pressure Sensing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27739-27749} }
CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation: Yuxing Long,

Jiyao Zhang,

Mingjie Pan,

Tianshu Wu,

Taewhan Kim,

Hao Dong; [pdf] [arXiv]
[bibtex]
@InProceedings{Long_2025_CVPR, author = {Long, Yuxing and Zhang, Jiyao and Pan, Mingjie and Wu, Tianshu and Kim, Taewhan and Dong, Hao}, title = {CheckManual: A New Challenge and Benchmark for Manual-based Appliance Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22595-22604} }
SEEN-DA: SEmantic ENtropy guided Domain-aware Attention for Domain Adaptive Object Detection: Haochen Li,

Rui Zhang,

Hantao Yao,

Xin Zhang,

Yifan Hao,

Xinkai Song,

Shaohui Peng,

Yongwei Zhao,

Chen Zhao,

Yanjun Wu,

Ling Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haochen and Zhang, Rui and Yao, Hantao and Zhang, Xin and Hao, Yifan and Song, Xinkai and Peng, Shaohui and Zhao, Yongwei and Zhao, Chen and Wu, Yanjun and Li, Ling}, title = {SEEN-DA: SEmantic ENtropy guided Domain-aware Attention for Domain Adaptive Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25465-25475} }
Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model: Shuyun Wang,

Hu Zhang,

Xin Shen,

Dadong Wang,

Xin Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shuyun and Zhang, Hu and Shen, Xin and Wang, Dadong and Yu, Xin}, title = {Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22975-22984} }
Mind the Trojan Horse: Image Prompt Adapter Enabling Scalable and Deceptive Jailbreaking: Junxi Chen,

Junhao Dong,

Xiaohua Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Junxi and Dong, Junhao and Xie, Xiaohua}, title = {Mind the Trojan Horse: Image Prompt Adapter Enabling Scalable and Deceptive Jailbreaking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23785-23794} }
GEM: A Generalizable Ego-Vision Multimodal World Model for Fine-Grained Ego-Motion, Object Dynamics, and Scene Composition Control: Mariam Hassan,

Sebastian Stapf,

Ahmad Rahimi,

Pedro M B Rezende,

Yasaman Haghighi,

David Brüggemann,

Isinsu Katircioglu,

Lin Zhang,

Xiaoran Chen,

Suman Saha,

Marco Cannici,

Elie Aljalbout,

Botao Ye,

Xi Wang,

Aram Davtyan,

Mathieu Salzmann,

Davide Scaramuzza,

Marc Pollefeys,

Paolo Favaro,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hassan_2025_CVPR, author = {Hassan, Mariam and Stapf, Sebastian and Rahimi, Ahmad and Rezende, Pedro M B and Haghighi, Yasaman and Br\"uggemann, David and Katircioglu, Isinsu and Zhang, Lin and Chen, Xiaoran and Saha, Suman and Cannici, Marco and Aljalbout, Elie and Ye, Botao and Wang, Xi and Davtyan, Aram and Salzmann, Mathieu and Scaramuzza, Davide and Pollefeys, Marc and Favaro, Paolo and Alahi, Alexandre}, title = {GEM: A Generalizable Ego-Vision Multimodal World Model for Fine-Grained Ego-Motion, Object Dynamics, and Scene Composition Control}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22404-22415} }
Scene-Centric Unsupervised Panoptic Segmentation: Oliver Hahn,

Christoph Reich,

Nikita Araslanov,

Daniel Cremers,

Christian Rupprecht,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hahn_2025_CVPR, author = {Hahn, Oliver and Reich, Christoph and Araslanov, Nikita and Cremers, Daniel and Rupprecht, Christian and Roth, Stefan}, title = {Scene-Centric Unsupervised Panoptic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24485-24495} }
Learning Physics From Video: Unsupervised Physical Parameter Estimation for Continuous Dynamical Systems: Alejandro Castañeda Garcia,

Jan Warchocki,

Jan van Gemert,

Daan Brinks,

Nergis Tomen; [pdf] [supp]
[bibtex]
@InProceedings{Garcia_2025_CVPR, author = {Garcia, Alejandro Casta\~neda and Warchocki, Jan and van Gemert, Jan and Brinks, Daan and Tomen, Nergis}, title = {Learning Physics From Video: Unsupervised Physical Parameter Estimation for Continuous Dynamical Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27924-27933} }
ProAPO: Progressively Automatic Prompt Optimization for Visual Classification: Xiangyan Qu,

Gaopeng Gou,

Jiamin Zhuang,

Jing Yu,

Kun Song,

Qihao Wang,

Yili Li,

Gang Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Xiangyan and Gou, Gaopeng and Zhuang, Jiamin and Yu, Jing and Song, Kun and Wang, Qihao and Li, Yili and Xiong, Gang}, title = {ProAPO: Progressively Automatic Prompt Optimization for Visual Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25145-25155} }
Black Swan: Abductive and Defeasible Video Reasoning in Unpredictable Events: Aditya Chinchure,

Sahithya Ravi,

Raymond Ng,

Vered Shwartz,

Boyang Li,

Leonid Sigal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chinchure_2025_CVPR, author = {Chinchure, Aditya and Ravi, Sahithya and Ng, Raymond and Shwartz, Vered and Li, Boyang and Sigal, Leonid}, title = {Black Swan: Abductive and Defeasible Video Reasoning in Unpredictable Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24201-24210} }
RNG: Relightable Neural Gaussians: Jiahui Fan,

Fujun Luan,

Jian Yang,

Milos Hasan,

Beibei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Jiahui and Luan, Fujun and Yang, Jian and Hasan, Milos and Wang, Beibei}, title = {RNG: Relightable Neural Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26525-26534} }
Towards Realistic Example-based Modeling via 3D Gaussian Stitching: Xinyu Gao,

Ziyi Yang,

Bingchen Gong,

Xiaoguang Han,

Sipeng Yang,

Xiaogang Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Xinyu and Yang, Ziyi and Gong, Bingchen and Han, Xiaoguang and Yang, Sipeng and Jin, Xiaogang}, title = {Towards Realistic Example-based Modeling via 3D Gaussian Stitching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26597-26607} }
Generative Sparse-View Gaussian Splatting: Hanyang Kong,

Xingyi Yang,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Hanyang and Yang, Xingyi and Wang, Xinchao}, title = {Generative Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26745-26755} }
Generative Inbetweening through Frame-wise Conditions-Driven Video Generation: Tianyi Zhu,

Dongwei Ren,

Qilong Wang,

Xiaohe Wu,

Wangmeng Zuo; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Tianyi and Ren, Dongwei and Wang, Qilong and Wu, Xiaohe and Zuo, Wangmeng}, title = {Generative Inbetweening through Frame-wise Conditions-Driven Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27968-27978} }
DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness: Yiming Zhong,

Qi Jiang,

Jingyi Yu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Yiming and Jiang, Qi and Yu, Jingyi and Ma, Yuexin}, title = {DexGrasp Anything: Towards Universal Robotic Dexterous Grasping with Physics Awareness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22584-22594} }
CustAny: Customizing Anything from A Single Example: Lingjie Kong,

Kai Wu,

Chengming Xu,

Xiaobin Hu,

Wenhui Han,

Jinlong Peng,

Donghao Luo,

Mengtian Li,

Jiangning Zhang,

Chengjie Wang,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2025_CVPR, author = {Kong, Lingjie and Wu, Kai and Xu, Chengming and Hu, Xiaobin and Han, Wenhui and Peng, Jinlong and Luo, Donghao and Li, Mengtian and Zhang, Jiangning and Wang, Chengjie and Fu, Yanwei}, title = {CustAny: Customizing Anything from A Single Example}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20916-20925} }
PoseTraj: Pose-Aware Trajectory Control in Video Diffusion: Longbin Ji,

Lei Zhong,

Pengfei Wei,

Changjian Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Longbin and Zhong, Lei and Wei, Pengfei and Li, Changjian}, title = {PoseTraj: Pose-Aware Trajectory Control in Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22776-22785} }
VL2Lite: Task-Specific Knowledge Distillation from Large Vision-Language Models to Lightweight Networks: Jinseong Jang,

Chunfei Ma,

Byeongwon Lee; [pdf]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Jinseong and Ma, Chunfei and Lee, Byeongwon}, title = {VL2Lite: Task-Specific Knowledge Distillation from Large Vision-Language Models to Lightweight Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30073-30083} }
StageDesigner: Artistic Stage Generation for Scenography via Theater Scripts: Zhaoxing Gan,

Mengtian Li,

Ruhua Chen,

Zhongxia Ji,

Sichen Guo,

Huanling Hu,

Guangnan Ye,

Zuo Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gan_2025_CVPR, author = {Gan, Zhaoxing and Li, Mengtian and Chen, Ruhua and Ji, Zhongxia and Guo, Sichen and Hu, Huanling and Ye, Guangnan and Hu, Zuo}, title = {StageDesigner: Artistic Stage Generation for Scenography via Theater Scripts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28705-28714} }
Interpreting Object-level Foundation Models via Visual Precision Search: Ruoyu Chen,

Siyuan Liang,

Jingzhi Li,

Shiming Liu,

Maosen Li,

Zhen Huang,

Hua Zhang,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ruoyu and Liang, Siyuan and Li, Jingzhi and Liu, Shiming and Li, Maosen and Huang, Zhen and Zhang, Hua and Cao, Xiaochun}, title = {Interpreting Object-level Foundation Models via Visual Precision Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30042-30052} }
Foley-Flow: Coordinated Video-to-Audio Generation with Masked Audio-Visual Alignment and Dynamic Conditional Flows: Shentong Mo,

Yibing Song; [pdf]
[bibtex]
@InProceedings{Mo_2025_CVPR, author = {Mo, Shentong and Song, Yibing}, title = {Foley-Flow: Coordinated Video-to-Audio Generation with Masked Audio-Visual Alignment and Dynamic Conditional Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28912-28921} }
All-directional Disparity Estimation for Real-world QPD Images: Hongtao Yu,

Shaohui Song,

Lihu Sun,

Wenkai Su,

Xiaodong Yang,

Chengming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Hongtao and Song, Shaohui and Sun, Lihu and Su, Wenkai and Yang, Xiaodong and Liu, Chengming}, title = {All-directional Disparity Estimation for Real-world QPD Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21836-21846} }
Using Diffusion Priors for Video Amodal Segmentation: Kaihua Chen,

Deva Ramanan,

Tarasha Khurana; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kaihua and Ramanan, Deva and Khurana, Tarasha}, title = {Using Diffusion Priors for Video Amodal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22890-22900} }
Dyn-HaMR: Recovering 4D Interacting Hand Motion from a Dynamic Camera: Zhengdi Yu,

Stefanos Zafeiriou,

Tolga Birdal; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Zhengdi and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Dyn-HaMR: Recovering 4D Interacting Hand Motion from a Dynamic Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27716-27726} }
The Scene Language: Representing Scenes with Programs, Words, and Embeddings: Yunzhi Zhang,

Zizhang Li,

Matt Zhou,

Shangzhe Wu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yunzhi and Li, Zizhang and Zhou, Matt and Wu, Shangzhe and Wu, Jiajun}, title = {The Scene Language: Representing Scenes with Programs, Words, and Embeddings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24625-24634} }
Learning Physics-Based Full-Body Human Reaching and Grasping from Brief Walking References: Yitang Li,

Mingxian Lin,

Zhuo Lin,

Yipeng Deng,

Yue Cao,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yitang and Lin, Mingxian and Lin, Zhuo and Deng, Yipeng and Cao, Yue and Yi, Li}, title = {Learning Physics-Based Full-Body Human Reaching and Grasping from Brief Walking References}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27673-27682} }
EmoEdit: Evoking Emotions through Image Manipulation: Jingyuan Yang,

Jiawei Feng,

Weibin Luo,

Dani Lischinski,

Daniel Cohen-Or,

Hui Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jingyuan and Feng, Jiawei and Luo, Weibin and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {EmoEdit: Evoking Emotions through Image Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24690-24699} }
SparseAlign: a Fully Sparse Framework for Cooperative Object Detection: Yunshuang Yuan,

Yan Xia,

Daniel Cremers,

Monika Sester; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Yunshuang and Xia, Yan and Cremers, Daniel and Sester, Monika}, title = {SparseAlign: a Fully Sparse Framework for Cooperative Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22296-22305} }
Data Distributional Properties As Inductive Bias for Systematic Generalization: Felipe del Rio,

Alain Raymond-Saez,

Daniel Florea,

Rodrigo Toro Icarte,

Julio Hurtado,

Cristian B. Calderon,

Alvaro Soto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{del_Rio_2025_CVPR, author = {del Rio, Felipe and Raymond-Saez, Alain and Florea, Daniel and Icarte, Rodrigo Toro and Hurtado, Julio and Calderon, Cristian B. and Soto, Alvaro}, title = {Data Distributional Properties As Inductive Bias for Systematic Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25590-25601} }
TopoCellGen: Generating Histopathology Cell Topology with a Diffusion Model: Meilong Xu,

Saumya Gupta,

Xiaoling Hu,

Chen Li,

Shahira Abousamra,

Dimitris Samaras,

Prateek Prasanna,

Chao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Meilong and Gupta, Saumya and Hu, Xiaoling and Li, Chen and Abousamra, Shahira and Samaras, Dimitris and Prasanna, Prateek and Chen, Chao}, title = {TopoCellGen: Generating Histopathology Cell Topology with a Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20979-20989} }
Meta-Learning Hyperparameters for Parameter Efficient Fine-Tuning: Zichen Tian,

Yaoyao Liu,

Qianru Sun; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zichen and Liu, Yaoyao and Sun, Qianru}, title = {Meta-Learning Hyperparameters for Parameter Efficient Fine-Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23037-23047} }
TriTex: Learning Texture from a Single Mesh via Triplane Semantic Features: Dana Cohen-Bar,

Daniel Cohen-Or,

Gal Chechik,

Yoni Kasten; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cohen-Bar_2025_CVPR, author = {Cohen-Bar, Dana and Cohen-Or, Daniel and Chechik, Gal and Kasten, Yoni}, title = {TriTex: Learning Texture from a Single Mesh via Triplane Semantic Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21403-21413} }
Wavelet and Prototype Augmented Query-based Transformer for Pixel-level Surface Defect Detection: Feng Yan,

Xiaoheng Jiang,

Yang Lu,

Jiale Cao,

Dong Chen,

Mingliang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Feng and Jiang, Xiaoheng and Lu, Yang and Cao, Jiale and Chen, Dong and Xu, Mingliang}, title = {Wavelet and Prototype Augmented Query-based Transformer for Pixel-level Surface Defect Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23860-23869} }
Alignment, Mining and Fusion: Representation Alignment with Hard Negative Mining and Selective Knowledge Fusion for Medical Visual Question Answering: Yuanhao Zou,

Zhaozheng Yin; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2025_CVPR, author = {Zou, Yuanhao and Yin, Zhaozheng}, title = {Alignment, Mining and Fusion: Representation Alignment with Hard Negative Mining and Selective Knowledge Fusion for Medical Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29623-29633} }
Language-Guided Audio-Visual Learning for Long-Term Sports Assessment: Huangbiao Xu,

Xiao Ke,

Huanqi Wu,

Rui Xu,

Yuezhou Li,

Wenzhong Guo; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Huangbiao and Ke, Xiao and Wu, Huanqi and Xu, Rui and Li, Yuezhou and Guo, Wenzhong}, title = {Language-Guided Audio-Visual Learning for Long-Term Sports Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23967-23977} }
Teller: Real-Time Streaming Audio-Driven Portrait Animation with Autoregressive Motion Generation: Dingcheng Zhen,

Shunshun Yin,

Shiyang Qin,

Hou Yi,

Ziwei Zhang,

Siyuan Liu,

Gan Qi,

Ming Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhen_2025_CVPR, author = {Zhen, Dingcheng and Yin, Shunshun and Qin, Shiyang and Yi, Hou and Zhang, Ziwei and Liu, Siyuan and Qi, Gan and Tao, Ming}, title = {Teller: Real-Time Streaming Audio-Driven Portrait Animation with Autoregressive Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21075-21085} }
PersonaHOI: Effortlessly Improving Face Personalization in Human-Object Interaction Generation: Xinting Hu,

Haoran Wang,

Jan Eric Lenssen,

Bernt Schiele; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Xinting and Wang, Haoran and Lenssen, Jan Eric and Schiele, Bernt}, title = {PersonaHOI: Effortlessly Improving Face Personalization in Human-Object Interaction Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23775-23784} }
Video-Panda: Parameter-efficient Alignment for Encoder-free Video-Language Models: Jinhui Yi,

Syed Talal Wasim,

Yanan Luo,

Muzammal Naseer,

Juergen Gall; [pdf] [supp]
[bibtex]
@InProceedings{Yi_2025_CVPR, author = {Yi, Jinhui and Wasim, Syed Talal and Luo, Yanan and Naseer, Muzammal and Gall, Juergen}, title = {Video-Panda: Parameter-efficient Alignment for Encoder-free Video-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24119-24128} }
MonoSplat: Generalizable 3D Gaussian Splatting from Monocular Depth Foundation Models: Yifan Liu,

Keyu Fan,

Weihao Yu,

Chenxin Li,

Hao Lu,

Yixuan Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yifan and Fan, Keyu and Yu, Weihao and Li, Chenxin and Lu, Hao and Yuan, Yixuan}, title = {MonoSplat: Generalizable 3D Gaussian Splatting from Monocular Depth Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21570-21579} }
Hybrid Global-Local Representation with Augmented Spatial Guidance for Zero-Shot Referring Image Segmentation: Ting Liu,

Siyuan Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Ting and Li, Siyuan}, title = {Hybrid Global-Local Representation with Augmented Spatial Guidance for Zero-Shot Referring Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29634-29643} }
Probability Density Geodesics in Image Diffusion Latent Space: Qingtao Yu,

Jaskirat Singh,

Zhaoyuan Yang,

Peter Henry Tu,

Jing Zhang,

Hongdong Li,

Richard Hartley,

Dylan Campbell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Qingtao and Singh, Jaskirat and Yang, Zhaoyuan and Tu, Peter Henry and Zhang, Jing and Li, Hongdong and Hartley, Richard and Campbell, Dylan}, title = {Probability Density Geodesics in Image Diffusion Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27989-27998} }
EgoLife: Towards Egocentric Life Assistant: Jingkang Yang,

Shuai Liu,

Hongming Guo,

Yuhao Dong,

Xiamengwei Zhang,

Sicheng Zhang,

Pengyun Wang,

Zitang Zhou,

Binzhu Xie,

Ziyue Wang,

Bei Ouyang,

Zhengyu Lin,

Marco Cominelli,

Zhongang Cai,

Bo Li,

Yuanhan Zhang,

Peiyuan Zhang,

Fangzhou Hong,

Joerg Widmer,

Francesco Gringoli,

Lei Yang,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jingkang and Liu, Shuai and Guo, Hongming and Dong, Yuhao and Zhang, Xiamengwei and Zhang, Sicheng and Wang, Pengyun and Zhou, Zitang and Xie, Binzhu and Wang, Ziyue and Ouyang, Bei and Lin, Zhengyu and Cominelli, Marco and Cai, Zhongang and Li, Bo and Zhang, Yuanhan and Zhang, Peiyuan and Hong, Fangzhou and Widmer, Joerg and Gringoli, Francesco and Yang, Lei and Liu, Ziwei}, title = {EgoLife: Towards Egocentric Life Assistant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28885-28900} }
BrepGiff: Lightweight Generation of Complex B-rep with 3D GAT Diffusion: Hao Guo,

Xiaoshui Huang,

Hao jiacheng,

Yunpeng Bai,

Hongping Gan,

Yilei Shi; [pdf]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Hao and Huang, Xiaoshui and jiacheng, Hao and Bai, Yunpeng and Gan, Hongping and Shi, Yilei}, title = {BrepGiff: Lightweight Generation of Complex B-rep with 3D GAT Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26587-26596} }
Towards Fine-Grained Interpretability: Counterfactual Explanations for Misclassification with Saliency Partition: Lintong Zhang,

Kang Yin,

Seong-Whan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Lintong and Yin, Kang and Lee, Seong-Whan}, title = {Towards Fine-Grained Interpretability: Counterfactual Explanations for Misclassification with Saliency Partition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30053-30062} }
Joint Scheduling of Causal Prompts and Tasks for Multi-Task Learning: Chaoyang Li,

Jianyang Qin,

Jinhao Cui,

Zeyu Liu,

Ning Hu,

Qing Liao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Chaoyang and Qin, Jianyang and Cui, Jinhao and Liu, Zeyu and Hu, Ning and Liao, Qing}, title = {Joint Scheduling of Causal Prompts and Tasks for Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25124-25134} }
3DGUT: Enabling Distorted Cameras and Secondary Rays in Gaussian Splatting: Qi Wu,

Janick Martinez Esturo,

Ashkan Mirzaei,

Nicolas Moënne-Loccoz,

Zan Gojcic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Qi and Esturo, Janick Martinez and Mirzaei, Ashkan and Mo\"enne-Loccoz, Nicolas and Gojcic, Zan}, title = {3DGUT: Enabling Distorted Cameras and Secondary Rays in Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26036-26046} }
It's a (Blind) Match! Towards Vision-Language Correspondence without Parallel Data: Dominik Schnaus,

Nikita Araslanov,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Schnaus_2025_CVPR, author = {Schnaus, Dominik and Araslanov, Nikita and Cremers, Daniel}, title = {It's a (Blind) Match! Towards Vision-Language Correspondence without Parallel Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24983-24992} }
Open Set Label Shift with Test Time Out-of-Distribution Reference: Changkun Ye,

Russell Tsuchida,

Lars Petersson,

Nick Barnes; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Changkun and Tsuchida, Russell and Petersson, Lars and Barnes, Nick}, title = {Open Set Label Shift with Test Time Out-of-Distribution Reference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30619-30629} }
GaussianFormer-2: Probabilistic Gaussian Superposition for Efficient 3D Occupancy Prediction: Yuanhui Huang,

Amonnut Thammatadatrakoon,

Wenzhao Zheng,

Yunpeng Zhang,

Dalong Du,

Jiwen Lu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yuanhui and Thammatadatrakoon, Amonnut and Zheng, Wenzhao and Zhang, Yunpeng and Du, Dalong and Lu, Jiwen}, title = {GaussianFormer-2: Probabilistic Gaussian Superposition for Efficient 3D Occupancy Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27477-27486} }
Flexible Frame Selection for Efficient Video Reasoning: Shyamal Buch,

Arsha Nagrani,

Anurag Arnab,

Cordelia Schmid; [pdf] [supp]
[bibtex]
@InProceedings{Buch_2025_CVPR, author = {Buch, Shyamal and Nagrani, Arsha and Arnab, Anurag and Schmid, Cordelia}, title = {Flexible Frame Selection for Efficient Video Reasoning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29071-29082} }
EventGPT: Event Stream Understanding with Multimodal Large Language Models: Shaoyu Liu,

Jianing Li,

Guanghui Zhao,

Yunjian Zhang,

Xin Meng,

Fei Richard Yu,

Xiangyang Ji,

Ming Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Shaoyu and Li, Jianing and Zhao, Guanghui and Zhang, Yunjian and Meng, Xin and Yu, Fei Richard and Ji, Xiangyang and Li, Ming}, title = {EventGPT: Event Stream Understanding with Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29139-29149} }
MITracker: Multi-View Integration for Visual Object Tracking: Mengjie Xu,

Yitao Zhu,

Haotian Jiang,

Jiaming Li,

Zhenrong Shen,

Sheng Wang,

Haolin Huang,

Xinyu Wang,

Han Zhang,

Qing Yang,

Qian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Mengjie and Zhu, Yitao and Jiang, Haotian and Li, Jiaming and Shen, Zhenrong and Wang, Sheng and Huang, Haolin and Wang, Xinyu and Zhang, Han and Yang, Qing and Wang, Qian}, title = {MITracker: Multi-View Integration for Visual Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27176-27185} }
Not Only Text: Exploring Compositionality of Visual Representations in Vision-Language Models: Davide Berasi,

Matteo Farina,

Massimiliano Mancini,

Elisa Ricci,

Nicola Strisciuglio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berasi_2025_CVPR, author = {Berasi, Davide and Farina, Matteo and Mancini, Massimiliano and Ricci, Elisa and Strisciuglio, Nicola}, title = {Not Only Text: Exploring Compositionality of Visual Representations in Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24917-24927} }
Exploring Scene Affinity for Semi-Supervised LiDAR Semantic Segmentation: Chuandong Liu,

Xingxing Weng,

Shuguo Jiang,

Pengcheng Li,

Lei Yu,

Gui-Song Xia; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Chuandong and Weng, Xingxing and Jiang, Shuguo and Li, Pengcheng and Yu, Lei and Xia, Gui-Song}, title = {Exploring Scene Affinity for Semi-Supervised LiDAR Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27380-27389} }
Minority-Focused Text-to-Image Generation via Prompt Optimization: Soobin Um,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Um_2025_CVPR, author = {Um, Soobin and Ye, Jong Chul}, title = {Minority-Focused Text-to-Image Generation via Prompt Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20926-20936} }
MANTA: A Large-Scale Multi-View and Visual-Text Anomaly Detection Dataset for Tiny Objects: Lei Fan,

Dongdong Fan,

Zhiguang Hu,

Yiwen Ding,

Donglin Di,

Kai Yi,

Maurice Pagnucco,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Lei and Fan, Dongdong and Hu, Zhiguang and Ding, Yiwen and Di, Donglin and Yi, Kai and Pagnucco, Maurice and Song, Yang}, title = {MANTA: A Large-Scale Multi-View and Visual-Text Anomaly Detection Dataset for Tiny Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25518-25527} }
SCSegamba: Lightweight Structure-Aware Vision Mamba for Crack Segmentation in Structures: Hui Liu,

Chen Jia,

Fan Shi,

Xu Cheng,

Shengyong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hui and Jia, Chen and Shi, Fan and Cheng, Xu and Chen, Shengyong}, title = {SCSegamba: Lightweight Structure-Aware Vision Mamba for Crack Segmentation in Structures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29406-29416} }
Collaborative Tree Search for Enhancing Embodied Multi-Agent Collaboration: Lizheng Zu,

Lin Lin,

Song Fu,

Na Zhao,

Pan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zu_2025_CVPR, author = {Zu, Lizheng and Lin, Lin and Fu, Song and Zhao, Na and Zhou, Pan}, title = {Collaborative Tree Search for Enhancing Embodied Multi-Agent Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29513-29522} }
Text-Driven Fashion Image Editing with Compositional Concept Learning and Counterfactual Abduction: Shanshan Huang,

Haoxuan Li,

Chunyuan Zheng,

Mingyuan Ge,

Wei Gao,

Lei Wang,

Li Liu; [pdf]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Shanshan and Li, Haoxuan and Zheng, Chunyuan and Ge, Mingyuan and Gao, Wei and Wang, Lei and Liu, Li}, title = {Text-Driven Fashion Image Editing with Compositional Concept Learning and Counterfactual Abduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28726-28735} }
Adapting Text-to-Image Generation with Feature Difference Instruction for Generic Image Restoration: Chao Wang,

Hehe Fan,

Huichen Yang,

Sarvnaz Karimi,

Lina Yao,

Yi Yang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Chao and Fan, Hehe and Yang, Huichen and Karimi, Sarvnaz and Yao, Lina and Yang, Yi}, title = {Adapting Text-to-Image Generation with Feature Difference Instruction for Generic Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23539-23550} }
ReCon: Enhancing True Correspondence Discrimination through Relation Consistency for Robust Noisy Correspondence Learning: Quanxing Zha,

Xin Liu,

Shu-Juan Peng,

Yiu-ming Cheung,

Xing Xu,

Nannan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zha_2025_CVPR, author = {Zha, Quanxing and Liu, Xin and Peng, Shu-Juan and Cheung, Yiu-ming and Xu, Xing and Wang, Nannan}, title = {ReCon: Enhancing True Correspondence Discrimination through Relation Consistency for Robust Noisy Correspondence Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29680-29689} }
Preconditioners for the Stochastic Training of Neural Fields: Shin-Fang Chng,

Hemanth Saratchandran,

Simon Lucey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chng_2025_CVPR, author = {Chng, Shin-Fang and Saratchandran, Hemanth and Lucey, Simon}, title = {Preconditioners for the Stochastic Training of Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27222-27232} }
ECBench: Can Multi-modal Foundation Models Understand the Egocentric World? A Holistic Embodied Cognition Benchmark: Ronghao Dang,

Yuqian Yuan,

Wenqi Zhang,

Yifei Xin,

Boqiang Zhang,

Long Li,

Liuyi Wang,

Qinyang Zeng,

Xin Li,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dang_2025_CVPR, author = {Dang, Ronghao and Yuan, Yuqian and Zhang, Wenqi and Xin, Yifei and Zhang, Boqiang and Li, Long and Wang, Liuyi and Zeng, Qinyang and Li, Xin and Bing, Lidong}, title = {ECBench: Can Multi-modal Foundation Models Understand the Egocentric World? A Holistic Embodied Cognition Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24593-24602} }
SfM-Free 3D Gaussian Splatting via Hierarchical Training: Bo Ji,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Bo and Yao, Angela}, title = {SfM-Free 3D Gaussian Splatting via Hierarchical Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21654-21663} }
CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design: Weitao Feng,

Hang Zhou,

Jing Liao,

Li Cheng,

Wenbo Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Weitao and Zhou, Hang and Liao, Jing and Cheng, Li and Zhou, Wenbo}, title = {CASAGPT: Cuboid Arrangement and Scene Assembly for Interior Design}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29173-29182} }
MINIMA: Modality Invariant Image Matching: Jiangwei Ren,

Xingyu Jiang,

Zizhuo Li,

Dingkang Liang,

Xin Zhou,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Jiangwei and Jiang, Xingyu and Li, Zizhuo and Liang, Dingkang and Zhou, Xin and Bai, Xiang}, title = {MINIMA: Modality Invariant Image Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23059-23068} }
3D Convex Splatting: Radiance Field Rendering with 3D Smooth Convexes: Jan Held,

Renaud Vandeghen,

Abdullah Hamdi,

Adrien Deliege,

Anthony Cioppa,

Silvio Giancola,

Andrea Vedaldi,

Bernard Ghanem,

Marc Van Droogenbroeck; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Held_2025_CVPR, author = {Held, Jan and Vandeghen, Renaud and Hamdi, Abdullah and Deliege, Adrien and Cioppa, Anthony and Giancola, Silvio and Vedaldi, Andrea and Ghanem, Bernard and Van Droogenbroeck, Marc}, title = {3D Convex Splatting: Radiance Field Rendering with 3D Smooth Convexes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21360-21369} }
3D Prior Is All You Need: Cross-Task Few-shot 2D Gaze Estimation: Yihua Cheng,

Hengfei Wang,

Zhongqun Zhang,

Yang Yue,

Boeun Kim,

Feng Lu,

Hyung Jin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Yihua and Wang, Hengfei and Zhang, Zhongqun and Yue, Yang and Kim, Boeun and Lu, Feng and Chang, Hyung Jin}, title = {3D Prior Is All You Need: Cross-Task Few-shot 2D Gaze Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23891-23900} }
SeriesBench: A Benchmark for Narrative-Driven Drama Series Understanding: Chenkai Zhang,

Yiming Lei,

Zeming Liu,

Haitao Leng,

ShaoGuo Liu,

Tingting Gao,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenkai and Lei, Yiming and Liu, Zeming and Leng, Haitao and Liu, ShaoGuo and Gao, Tingting and Liu, Qingjie and Wang, Yunhong}, title = {SeriesBench: A Benchmark for Narrative-Driven Drama Series Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28995-29004} }
Weakly Supervised Temporal Action Localization via Dual-Prior Collaborative Learning Guided by Multimodal Large Language Models: Quan Zhang,

Jinwei Fang,

Rui Yuan,

Xi Tang,

Yuxin Qi,

Ke Zhang,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Quan and Fang, Jinwei and Yuan, Rui and Tang, Xi and Qi, Yuxin and Zhang, Ke and Yuan, Chun}, title = {Weakly Supervised Temporal Action Localization via Dual-Prior Collaborative Learning Guided by Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24139-24148} }
GliaNet: Adaptive Neural Network Structure Learning with Glia-Driven: Mengqiao Han,

Liyuan Pan,

Xiabi Liu; [pdf] [supp]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Mengqiao and Pan, Liyuan and Liu, Xiabi}, title = {GliaNet: Adaptive Neural Network Structure Learning with Glia-Driven}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25240-25249} }
EntitySAM: Segment Everything in Video: Mingqiao Ye,

Seoung Wug Oh,

Lei Ke,

Joon-Young Lee; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Mingqiao and Oh, Seoung Wug and Ke, Lei and Lee, Joon-Young}, title = {EntitySAM: Segment Everything in Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24234-24243} }
GS-2DGS: Geometrically Supervised 2DGS for Reflective Object Reconstruction: Jinguang Tong,

Xuesong Li,

Fahira Afzal Maken,

Sundaram Muthu,

Lars Petersson,

Chuong Nguyen,

Hongdong Li; [pdf] [supp]
[bibtex]
@InProceedings{Tong_2025_CVPR, author = {Tong, Jinguang and Li, Xuesong and Maken, Fahira Afzal and Muthu, Sundaram and Petersson, Lars and Nguyen, Chuong and Li, Hongdong}, title = {GS-2DGS: Geometrically Supervised 2DGS for Reflective Object Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21547-21557} }
Video Depth Anything: Consistent Depth Estimation for Super-Long Videos: Sili Chen,

Hengkai Guo,

Shengnan Zhu,

Feihu Zhang,

Zilong Huang,

Jiashi Feng,

Bingyi Kang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Sili and Guo, Hengkai and Zhu, Shengnan and Zhang, Feihu and Huang, Zilong and Feng, Jiashi and Kang, Bingyi}, title = {Video Depth Anything: Consistent Depth Estimation for Super-Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22831-22840} }
InstanceCap: Improving Text-to-Video Generation via Instance-aware Structured Caption: Tiehan Fan,

Kepan Nan,

Rui Xie,

Penghao Zhou,

Zhenheng Yang,

Chaoyou Fu,

Xiang Li,

Jian Yang,

Ying Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Tiehan and Nan, Kepan and Xie, Rui and Zhou, Penghao and Yang, Zhenheng and Fu, Chaoyou and Li, Xiang and Yang, Jian and Tai, Ying}, title = {InstanceCap: Improving Text-to-Video Generation via Instance-aware Structured Caption}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28974-28983} }
Luminance-GS: Adapting 3D Gaussian Splatting to Challenging Lighting Conditions with View-Adaptive Curve Adjustment: Ziteng Cui,

Xuangeng Chu,

Tatsuya Harada; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Ziteng and Chu, Xuangeng and Harada, Tatsuya}, title = {Luminance-GS: Adapting 3D Gaussian Splatting to Challenging Lighting Conditions with View-Adaptive Curve Adjustment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26472-26482} }
EventSplat: 3D Gaussian Splatting from Moving Event Cameras for Real-time Rendering: Toshiya Yura,

Ashkan Mirzaei,

Igor Gilitschenski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yura_2025_CVPR, author = {Yura, Toshiya and Mirzaei, Ashkan and Gilitschenski, Igor}, title = {EventSplat: 3D Gaussian Splatting from Moving Event Cameras for Real-time Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26876-26886} }
3D Student Splatting and Scooping: Jialin Zhu,

Jiangbei Yue,

Feixiang He,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Jialin and Yue, Jiangbei and He, Feixiang and Wang, He}, title = {3D Student Splatting and Scooping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21045-21054} }
World-consistent Video Diffusion with Explicit 3D Modeling: Qihang Zhang,

Shuangfei Zhai,

Miguel Ángel Bautista Martin,

Kevin Miao,

Alexander Toshev,

Joshua Susskind,

Jiatao Gu; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qihang and Zhai, Shuangfei and Martin, Miguel \'Angel Bautista and Miao, Kevin and Toshev, Alexander and Susskind, Joshua and Gu, Jiatao}, title = {World-consistent Video Diffusion with Explicit 3D Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21685-21695} }
Learning Partonomic 3D Reconstruction from Image Collections: Xiaoqian Ruan,

Pei Yu,

Dian Jia,

Hyeonjeong Park,

Peixi Xiong,

Wei Tang; [pdf] [supp]
[bibtex]
@InProceedings{Ruan_2025_CVPR, author = {Ruan, Xiaoqian and Yu, Pei and Jia, Dian and Park, Hyeonjeong and Xiong, Peixi and Tang, Wei}, title = {Learning Partonomic 3D Reconstruction from Image Collections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26734-26744} }
ODA-GAN: Orthogonal Decoupling Alignment GAN Assisted by Weakly-supervised Learning for Virtual Immunohistochemistry Staining: Tong Wang,

Mingkang Wang,

Zhongze Wang,

Hongkai Wang,

Qi Xu,

Fengyu Cong,

Hongming Xu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tong and Wang, Mingkang and Wang, Zhongze and Wang, Hongkai and Xu, Qi and Cong, Fengyu and Xu, Hongming}, title = {ODA-GAN: Orthogonal Decoupling Alignment GAN Assisted by Weakly-supervised Learning for Virtual Immunohistochemistry Staining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25920-25929} }
EVOS: Efficient Implicit Neural Training via EVOlutionary Selector: Weixiang Zhang,

Shuzhao Xie,

Chengwei Ren,

Siyi Xie,

Chen Tang,

Shijia Ge,

Mingzi Wang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Weixiang and Xie, Shuzhao and Ren, Chengwei and Xie, Siyi and Tang, Chen and Ge, Shijia and Wang, Mingzi and Wang, Zhi}, title = {EVOS: Efficient Implicit Neural Training via EVOlutionary Selector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30472-30482} }
MEET: Towards Memory-Efficient Temporal Sparse Deep Neural Networks: Zeqi Zhu,

Ibrahim Batuhan Akkaya,

Luc Waeijen,

Egor Bondarev,

Arash Pourtaherian,

Orlando Moreira; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Zeqi and Akkaya, Ibrahim Batuhan and Waeijen, Luc and Bondarev, Egor and Pourtaherian, Arash and Moreira, Orlando}, title = {MEET: Towards Memory-Efficient Temporal Sparse Deep Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29309-29320} }
Probabilistic Prompt Distribution Learning for Animal Pose Estimation: Jiyong Rao,

Brian Nlong Zhao,

Yu Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Rao_2025_CVPR, author = {Rao, Jiyong and Zhao, Brian Nlong and Wang, Yu}, title = {Probabilistic Prompt Distribution Learning for Animal Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29438-29447} }
Mitigating Object Hallucinations in Large Vision-Language Models with Assembly of Global and Local Attention: Wenbin An,

Feng Tian,

Sicong Leng,

Jiahao Nie,

Haonan Lin,

Qianying Wang,

Ping Chen,

Xiaoqin Zhang,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2025_CVPR, author = {An, Wenbin and Tian, Feng and Leng, Sicong and Nie, Jiahao and Lin, Haonan and Wang, Qianying and Chen, Ping and Zhang, Xiaoqin and Lu, Shijian}, title = {Mitigating Object Hallucinations in Large Vision-Language Models with Assembly of Global and Local Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29915-29926} }
UniSTD: Towards Unified Spatio-Temporal Learning across Diverse Disciplines: Chen Tang,

Xinzhu Ma,

Encheng Su,

Xiufeng Song,

Xiaohong Liu,

Wei-Hong Li,

Lei Bai,

Wanli Ouyang,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Chen and Ma, Xinzhu and Su, Encheng and Song, Xiufeng and Liu, Xiaohong and Li, Wei-Hong and Bai, Lei and Ouyang, Wanli and Yue, Xiangyu}, title = {UniSTD: Towards Unified Spatio-Temporal Learning across Diverse Disciplines}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29213-29224} }
Mani-GS: Gaussian Splatting Manipulation with Triangular Mesh: Xiangjun Gao,

Xiaoyu Li,

Yiyu Zhuang,

Qi Zhang,

Wenbo Hu,

Chaopeng Zhang,

Yao Yao,

Ying Shan,

Long Quan; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Xiangjun and Li, Xiaoyu and Zhuang, Yiyu and Zhang, Qi and Hu, Wenbo and Zhang, Chaopeng and Yao, Yao and Shan, Ying and Quan, Long}, title = {Mani-GS: Gaussian Splatting Manipulation with Triangular Mesh}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21392-21402} }
BooW-VTON: Boosting In-the-Wild Virtual Try-On via Mask-Free Pseudo Data Training: Xuanpu Zhang,

Dan Song,

Pengxin Zhan,

Tianyu Chang,

Jianhao Zeng,

Qingguo Chen,

Weihua Luo,

An-An Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xuanpu and Song, Dan and Zhan, Pengxin and Chang, Tianyu and Zeng, Jianhao and Chen, Qingguo and Luo, Weihua and Liu, An-An}, title = {BooW-VTON: Boosting In-the-Wild Virtual Try-On via Mask-Free Pseudo Data Training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26399-26408} }
Supervising Sound Localization by In-the-wild Egomotion: Anna Min,

Ziyang Chen,

Hang Zhao,

Andrew Owens; [pdf] [supp]
[bibtex]
@InProceedings{Min_2025_CVPR, author = {Min, Anna and Chen, Ziyang and Zhao, Hang and Owens, Andrew}, title = {Supervising Sound Localization by In-the-wild Egomotion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23936-23946} }
AutoLUT: LUT-Based Image Super-Resolution with Automatic Sampling and Adaptive Residual Learning: Yuheng Xu,

Shijie Yang,

Xin Liu,

Jie Liu,

Jie Tang,

Gangshan Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Yuheng and Yang, Shijie and Liu, Xin and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {AutoLUT: LUT-Based Image Super-Resolution with Automatic Sampling and Adaptive Residual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23131-23140} }
AniGS: Animatable Gaussian Avatar from a Single Image with Inconsistent Gaussian Reconstruction: Lingteng Qiu,

Shenhao Zhu,

Qi Zuo,

Xiaodong Gu,

Yuan Dong,

Junfei Zhang,

Chao Xu,

Zhe Li,

Weihao Yuan,

Liefeng Bo,

Guanying Chen,

Zilong Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Lingteng and Zhu, Shenhao and Zuo, Qi and Gu, Xiaodong and Dong, Yuan and Zhang, Junfei and Xu, Chao and Li, Zhe and Yuan, Weihao and Bo, Liefeng and Chen, Guanying and Dong, Zilong}, title = {AniGS: Animatable Gaussian Avatar from a Single Image with Inconsistent Gaussian Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21148-21158} }
IM-Portrait: Learning 3D-aware Video Diffusion for Photorealistic Talking Heads from Monocular VideosC: Yuan Li,

Ziqian Bai,

Feitong Tan,

Zhaopeng Cui,

Sean Fanello,

Yinda Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuan and Bai, Ziqian and Tan, Feitong and Cui, Zhaopeng and Fanello, Sean and Zhang, Yinda}, title = {IM-Portrait: Learning 3D-aware Video Diffusion for Photorealistic Talking Heads from Monocular VideosC}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21107-21116} }
DynaMoDe-NeRF: Motion-aware Deblurring Neural Radiance Field for Dynamic Scenes: Ashish Kumar,

Rajagopalan A. N.; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2025_CVPR, author = {Kumar, Ashish and N., Rajagopalan A.}, title = {DynaMoDe-NeRF: Motion-aware Deblurring Neural Radiance Field for Dynamic Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21728-21738} }
UrbanCAD: Towards Highly Controllable and Photorealistic 3D Vehicles for Urban Scene Simulation: Yichong Lu,

Yichi Cai,

Shangzhan Zhang,

Hongyu Zhou,

Haoji Hu,

Huimin Yu,

Andreas Geiger,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yichong and Cai, Yichi and Zhang, Shangzhan and Zhou, Hongyu and Hu, Haoji and Yu, Huimin and Geiger, Andreas and Liao, Yiyi}, title = {UrbanCAD: Towards Highly Controllable and Photorealistic 3D Vehicles for Urban Scene Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27519-27530} }
Diff-Palm: Realistic Palmprint Generation with Polynomial Creases and Intra-Class Variation Controllable Diffusion Models: Jianlong Jin,

Chenglong Zhao,

Ruixin Zhang,

Sheng Shang,

Jianqing Xu,

Jingyun Zhang,

ShaoMing Wang,

Yang Zhao,

Shouhong Ding,

Wei Jia,

Yunsheng Wu; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Jianlong and Zhao, Chenglong and Zhang, Ruixin and Shang, Sheng and Xu, Jianqing and Zhang, Jingyun and Wang, ShaoMing and Zhao, Yang and Ding, Shouhong and Jia, Wei and Wu, Yunsheng}, title = {Diff-Palm: Realistic Palmprint Generation with Polynomial Creases and Intra-Class Variation Controllable Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26367-26376} }
Chain of Semantics Programming in 3D Gaussian Splatting Representation for 3D Vision Grounding: Jiaxin Shi,

Mingyue Xiang,

Hao Sun,

Yixuan Huang,

Zhi Weng; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Jiaxin and Xiang, Mingyue and Sun, Hao and Huang, Yixuan and Weng, Zhi}, title = {Chain of Semantics Programming in 3D Gaussian Splatting Representation for 3D Vision Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24560-24569} }
MVPortrait: Text-Guided Motion and Emotion Control for Multi-view Vivid Portrait Animation: Yukang Lin,

Hokit Fung,

Jianjin Xu,

Zeping Ren,

Adela S.M. Lau,

Guosheng Yin,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Yukang and Fung, Hokit and Xu, Jianjin and Ren, Zeping and Lau, Adela S.M. and Yin, Guosheng and Li, Xiu}, title = {MVPortrait: Text-Guided Motion and Emotion Control for Multi-view Vivid Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26242-26252} }
Accelerating Multimodal Large Language Models by Searching Optimal Vision Token Reduction: Shiyu Zhao,

Zhenting Wang,

Felix Juefei-Xu,

Xide Xia,

Miao Liu,

Xiaofang Wang,

Mingfu Liang,

Ning Zhang,

Dimitris N. Metaxas,

Licheng Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shiyu and Wang, Zhenting and Juefei-Xu, Felix and Xia, Xide and Liu, Miao and Wang, Xiaofang and Liang, Mingfu and Zhang, Ning and Metaxas, Dimitris N. and Yu, Licheng}, title = {Accelerating Multimodal Large Language Models by Searching Optimal Vision Token Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29869-29879} }
Matrix-Free Shared Intrinsics Bundle Adjustment: Daniel Safari; [pdf]
[bibtex]
@InProceedings{Safari_2025_CVPR, author = {Safari, Daniel}, title = {Matrix-Free Shared Intrinsics Bundle Adjustment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27017-27026} }
Uncertainty-Instructed Structure Injection for Generalizable HD Map Construction: Xiaolu Liu,

Ruizi Yang,

Song Wang,

Wentong Li,

Junbo Chen,

Jianke Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xiaolu and Yang, Ruizi and Wang, Song and Li, Wentong and Chen, Junbo and Zhu, Jianke}, title = {Uncertainty-Instructed Structure Injection for Generalizable HD Map Construction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22359-22368} }
Color Alignment in Diffusion: Ka Chun Shum,

Binh-Son Hua,

Duc Thanh Nguyen,

Sai-Kit Yeung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shum_2025_CVPR, author = {Shum, Ka Chun and Hua, Binh-Son and Nguyen, Duc Thanh and Yeung, Sai-Kit}, title = {Color Alignment in Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28446-28455} }
LLAVIDAL: A Large LAnguage VIsion Model for Daily Activities of Living: Dominick Reilly,

Rajatsubhra Chakraborty,

Arkaprava Sinha,

Manish Kumar Govind,

Pu Wang,

Francois Bremond,

Le Xue,

Srijan Das; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reilly_2025_CVPR, author = {Reilly, Dominick and Chakraborty, Rajatsubhra and Sinha, Arkaprava and Govind, Manish Kumar and Wang, Pu and Bremond, Francois and Xue, Le and Das, Srijan}, title = {LLAVIDAL: A Large LAnguage VIsion Model for Daily Activities of Living}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24297-24308} }
Language-Guided Salient Object Ranking: Fang Liu,

Yuhao Liu,

Ke Xu,

Shuquan Ye,

Gerhard Petrus Hancke,

Rynson W. H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Fang and Liu, Yuhao and Xu, Ke and Ye, Shuquan and Hancke, Gerhard Petrus and Lau, Rynson W. H.}, title = {Language-Guided Salient Object Ranking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29803-29813} }
Towards More General Video-based Deepfake Detection through Facial Component Guided Adaptation for Foundation Model: Yue-Hua Han,

Tai-Ming Huang,

Kai-Lung Hua,

Jun-Cheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Yue-Hua and Huang, Tai-Ming and Hua, Kai-Lung and Chen, Jun-Cheng}, title = {Towards More General Video-based Deepfake Detection through Facial Component Guided Adaptation for Foundation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22995-23005} }
SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate Cross-Modal Semantic Prompts: Shijia Zhao,

Qiming Xia,

Xusheng Guo,

Pufan Zou,

Maoji Zheng,

Hai Wu,

Chenglu Wen,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shijia and Xia, Qiming and Guo, Xusheng and Zou, Pufan and Zheng, Maoji and Wu, Hai and Wen, Chenglu and Wang, Cheng}, title = {SP3D: Boosting Sparsely-Supervised 3D Object Detection via Accurate Cross-Modal Semantic Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29374-29384} }
VoCo-LLaMA: Towards Vision Compression with Large Language Models: Xubing Ye,

Yukang Gan,

Xiaoke Huang,

Yixiao Ge,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Xubing and Gan, Yukang and Huang, Xiaoke and Ge, Yixiao and Tang, Yansong}, title = {VoCo-LLaMA: Towards Vision Compression with Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29836-29846} }
Focal Split: Untethered Snapshot Depth from Differential Defocus: Junjie Luo,

John Mamish,

Alan Fu,

Thomas Concannon,

Josiah Hester,

Emma Alexander,

Qi Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Junjie and Mamish, John and Fu, Alan and Concannon, Thomas and Hester, Josiah and Alexander, Emma and Guo, Qi}, title = {Focal Split: Untethered Snapshot Depth from Differential Defocus}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26965-26974} }
PURA: Parameter Update-Recovery Test-Time Adaption for RGB-T Tracking: Zekai Shao,

Yufan Hu,

Bin Fan,

Hongmin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Zekai and Hu, Yufan and Fan, Bin and Liu, Hongmin}, title = {PURA: Parameter Update-Recovery Test-Time Adaption for RGB-T Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22089-22098} }
Towards All-in-One Medical Image Re-Identification: Yuan Tian,

Kaiyuan Ji,

Rongzhao Zhang,

Yankai Jiang,

Chunyi Li,

Xiaosong Wang,

Guangtao Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Yuan and Ji, Kaiyuan and Zhang, Rongzhao and Jiang, Yankai and Li, Chunyi and Wang, Xiaosong and Zhai, Guangtao}, title = {Towards All-in-One Medical Image Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30774-30786} }
Integral Fast Fourier Color Constancy: Wenjun Wei,

Yanlin Qian,

Huaian Chen,

Junkang Dai,

Yi Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Wenjun and Qian, Yanlin and Chen, Huaian and Dai, Junkang and Jin, Yi}, title = {Integral Fast Fourier Color Constancy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26420-26429} }
ResCLIP: Residual Attention for Training-free Dense Vision-language Inference: Yuhang Yang,

Jinhong Deng,

Wen Li,

Lixin Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yuhang and Deng, Jinhong and Li, Wen and Duan, Lixin}, title = {ResCLIP: Residual Attention for Training-free Dense Vision-language Inference}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29968-29978} }
Dispider: Enabling Video LLMs with Active Real-Time Interaction via Disentangled Perception, Decision, and Reaction: Rui Qian,

Shuangrui Ding,

Xiaoyi Dong,

Pan Zhang,

Yuhang Zang,

Yuhang Cao,

Dahua Lin,

Jiaqi Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Rui and Ding, Shuangrui and Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Lin, Dahua and Wang, Jiaqi}, title = {Dispider: Enabling Video LLMs with Active Real-Time Interaction via Disentangled Perception, Decision, and Reaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24045-24055} }
Bayesian Test-Time Adaptation for Vision-Language Models: Lihua Zhou,

Mao Ye,

Shuaifeng Li,

Nianxin Li,

Xiatian Zhu,

Lei Deng,

Hongbin Liu,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Lihua and Ye, Mao and Li, Shuaifeng and Li, Nianxin and Zhu, Xiatian and Deng, Lei and Liu, Hongbin and Lei, Zhen}, title = {Bayesian Test-Time Adaptation for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29999-30009} }
Causal Composition Diffusion Model for Closed-loop Traffic Generation: Haohong Lin,

Xin Huang,

Tung Phan,

David Hayden,

Huan Zhang,

Ding Zhao,

Siddhartha Srinivasa,

Eric Wolff,

Hongge Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Haohong and Huang, Xin and Phan, Tung and Hayden, David and Zhang, Huan and Zhao, Ding and Srinivasa, Siddhartha and Wolff, Eric and Chen, Hongge}, title = {Causal Composition Diffusion Model for Closed-loop Traffic Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27542-27552} }
Change3D: Revisiting Change Detection and Captioning from A Video Modeling Perspective: Duowang Zhu,

Xiaohu Huang,

Haiyan Huang,

Hao Zhou,

Zhenfeng Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Duowang and Huang, Xiaohu and Huang, Haiyan and Zhou, Hao and Shao, Zhenfeng}, title = {Change3D: Revisiting Change Detection and Captioning from A Video Modeling Perspective}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24011-24022} }
Attribute-formed Class-specific Concept Space: Endowing Language Bottleneck Model with Better Interpretability and Scalability: Jianyang Zhang,

Qianli Luo,

Guowu Yang,

Wenjing Yang,

Weide Liu,

Guosheng Lin,

Fengmao Lv; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jianyang and Luo, Qianli and Yang, Guowu and Yang, Wenjing and Liu, Weide and Lin, Guosheng and Lv, Fengmao}, title = {Attribute-formed Class-specific Concept Space: Endowing Language Bottleneck Model with Better Interpretability and Scalability}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30291-30300} }
Customized Condition Controllable Generation for Video Soundtrack: Fan Qi,

Kunsheng Ma,

Changsheng Xu; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Fan and Ma, Kunsheng and Xu, Changsheng}, title = {Customized Condition Controllable Generation for Video Soundtrack}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23914-23924} }
ProjAttacker: A Configurable Physical Adversarial Attack for Face Recognition via Projector: Yuanwei Liu,

Hui Wei,

Chengyu Jia,

Ruqi Xiao,

Weijian Ruan,

Xingxing Wei,

Joey Tianyi Zhou,

Zheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yuanwei and Wei, Hui and Jia, Chengyu and Xiao, Ruqi and Ruan, Weijian and Wei, Xingxing and Zhou, Joey Tianyi and Wang, Zheng}, title = {ProjAttacker: A Configurable Physical Adversarial Attack for Face Recognition via Projector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21248-21257} }
WISE: A Framework for Gigapixel Whole-Slide-Image Lossless Compression: Yu Mao,

Jun Wang,

Nan Guan,

Chun Jason Xue; [pdf] [arXiv]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Yu and Wang, Jun and Guan, Nan and Xue, Chun Jason}, title = {WISE: A Framework for Gigapixel Whole-Slide-Image Lossless Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29342-29351} }
Gromov-Wasserstein Problem with Cyclic Symmetry: Shoichiro Takeda,

Yasunori Akagi; [pdf] [supp]
[bibtex]
@InProceedings{Takeda_2025_CVPR, author = {Takeda, Shoichiro and Akagi, Yasunori}, title = {Gromov-Wasserstein Problem with Cyclic Symmetry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21011-21020} }
SimAvatar: Simulation-Ready Avatars with Layered Hair and Clothing: Xueting Li,

Ye Yuan,

Shalini De Mello,

Gilles Daviet,

Jonathan Leaf,

Miles Macklin,

Jan Kautz,

Umar Iqbal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xueting and Yuan, Ye and De Mello, Shalini and Daviet, Gilles and Leaf, Jonathan and Macklin, Miles and Kautz, Jan and Iqbal, Umar}, title = {SimAvatar: Simulation-Ready Avatars with Layered Hair and Clothing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26320-26330} }
Test-Time Backdoor Detection for Object Detection Models: Hangtao Zhang,

Yichen Wang,

Shihui Yan,

Chenyu Zhu,

Ziqi Zhou,

Linshan Hou,

Shengshan Hu,

Minghui Li,

Yanjun Zhang,

Leo Yu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Hangtao and Wang, Yichen and Yan, Shihui and Zhu, Chenyu and Zhou, Ziqi and Hou, Linshan and Hu, Shengshan and Li, Minghui and Zhang, Yanjun and Zhang, Leo Yu}, title = {Test-Time Backdoor Detection for Object Detection Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24377-24386} }
SDBF: Steep-Decision-Boundary Fingerprinting for Hard-Label Tampering Detection of DNN Models: Xiaofan Bai,

Shixin Li,

Xiaojing Ma,

Bin Benjamin Zhu,

Dongmei Zhang,

Linchen Yu; [pdf] [supp]
[bibtex]
@InProceedings{Bai_2025_CVPR, author = {Bai, Xiaofan and Li, Shixin and Ma, Xiaojing and Zhu, Bin Benjamin and Zhang, Dongmei and Yu, Linchen}, title = {SDBF: Steep-Decision-Boundary Fingerprinting for Hard-Label Tampering Detection of DNN Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29278-29287} }
Distilling Multi-modal Large Language Models for Autonomous Driving: Deepti Hegde,

Rajeev Yasarla,

Hong Cai,

Shizhong Han,

Apratim Bhattacharyya,

Shweta Mahajan,

Litian Liu,

Risheek Garrepalli,

Vishal M. Patel,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hegde_2025_CVPR, author = {Hegde, Deepti and Yasarla, Rajeev and Cai, Hong and Han, Shizhong and Bhattacharyya, Apratim and Mahajan, Shweta and Liu, Litian and Garrepalli, Risheek and Patel, Vishal M. and Porikli, Fatih}, title = {Distilling Multi-modal Large Language Models for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27575-27585} }
HD-EPIC: A Highly-Detailed Egocentric Video Dataset: Toby Perrett,

Ahmad Darkhalil,

Saptarshi Sinha,

Omar Emara,

Sam Pollard,

Kranti Kumar Parida,

Kaiting Liu,

Prajwal Gatti,

Siddhant Bansal,

Kevin Flanagan,

Jacob Chalk,

Zhifan Zhu,

Rhodri Guerrier,

Fahd Abdelazim,

Bin Zhu,

Davide Moltisanti,

Michael Wray,

Hazel Doughty,

Dima Damen; [pdf] [supp]
[bibtex]
@InProceedings{Perrett_2025_CVPR, author = {Perrett, Toby and Darkhalil, Ahmad and Sinha, Saptarshi and Emara, Omar and Pollard, Sam and Parida, Kranti Kumar and Liu, Kaiting and Gatti, Prajwal and Bansal, Siddhant and Flanagan, Kevin and Chalk, Jacob and Zhu, Zhifan and Guerrier, Rhodri and Abdelazim, Fahd and Zhu, Bin and Moltisanti, Davide and Wray, Michael and Doughty, Hazel and Damen, Dima}, title = {HD-EPIC: A Highly-Detailed Egocentric Video Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23901-23913} }
Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training: Haicheng Wang,

Chen Ju,

Weixiong Lin,

Shuai Xiao,

Mengting Chen,

Yixuan Huang,

Chang Liu,

Mingshuai Yao,

Jinsong Lan,

Ying Chen,

Qingwen Liu,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haicheng and Ju, Chen and Lin, Weixiong and Xiao, Shuai and Chen, Mengting and Huang, Yixuan and Liu, Chang and Yao, Mingshuai and Lan, Jinsong and Chen, Ying and Liu, Qingwen and Wang, Yanfeng}, title = {Advancing Myopia To Holism: Fully Contrastive Language-Image Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29791-29802} }
H-MoRe: Learning Human-centric Motion Representation for Action Analysis: Zhanbo Huang,

Xiaoming Liu,

Yu Kong; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhanbo and Liu, Xiaoming and Kong, Yu}, title = {H-MoRe: Learning Human-centric Motion Representation for Action Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22702-22713} }
Hierarchical Compact Clustering Attention (COCA) for Unsupervised Object-Centric Learning: Can Kucuksozen,

Yucel Yemez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kucuksozen_2025_CVPR, author = {Kucuksozen, Can and Yemez, Yucel}, title = {Hierarchical Compact Clustering Attention (COCA) for Unsupervised Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25388-25398} }
Effortless Active Labeling for Long-Term Test-Time Adaptation: Guowei Wang,

Changxing Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Guowei and Ding, Changxing}, title = {Effortless Active Labeling for Long-Term Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25633-25642} }
Leveraging Temporal Cues for Semi-Supervised Multi-View 3D Object Detection: Jinhyung Park,

Navyata Sanghvi,

Hiroki Adachi,

Yoshihisa Shibata,

Shawn Hunt,

Shinya Tanaka,

Hironobu Fujiyoshi,

Kris Kitani; [pdf] [supp]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jinhyung and Sanghvi, Navyata and Adachi, Hiroki and Shibata, Yoshihisa and Hunt, Shawn and Tanaka, Shinya and Fujiyoshi, Hironobu and Kitani, Kris}, title = {Leveraging Temporal Cues for Semi-Supervised Multi-View 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27401-27412} }
Logits DeConfusion with CLIP for Few-Shot Learning: Shuo Li,

Fang Liu,

Zehua Hao,

Xinyi Wang,

Lingling Li,

Xu Liu,

Puhua Chen,

Wenping Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shuo and Liu, Fang and Hao, Zehua and Wang, Xinyi and Li, Lingling and Liu, Xu and Chen, Puhua and Ma, Wenping}, title = {Logits DeConfusion with CLIP for Few-Shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25411-25421} }
Pay Attention to the Foreground in Object-Centric Learning: Pinzhuo Tian,

Shengjie Yang,

Hang Yu,

Alex Kot; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Pinzhuo and Yang, Shengjie and Yu, Hang and Kot, Alex}, title = {Pay Attention to the Foreground in Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30281-30290} }
FluidNexus: 3D Fluid Reconstruction and Prediction from a Single Video: Yue Gao,

Hong-Xing Yu,

Bo Zhu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Yue and Yu, Hong-Xing and Zhu, Bo and Wu, Jiajun}, title = {FluidNexus: 3D Fluid Reconstruction and Prediction from a Single Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26091-26101} }
DeformCL: Learning Deformable Centerline Representation for Vessel Extraction in 3D Medical Image: Ziwei Zhao,

Zhixing Zhang,

Yuhang Liu,

Zhao Zhang,

Haojun Yu,

Dong Wang,

Liwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ziwei and Zhang, Zhixing and Liu, Yuhang and Zhang, Zhao and Yu, Haojun and Wang, Dong and Wang, Liwei}, title = {DeformCL: Learning Deformable Centerline Representation for Vessel Extraction in 3D Medical Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30896-30905} }
OCRT: Boosting Foundation Models in the Open World with Object-Concept-Relation Triad: Luyao Tang,

Yuxuan Yuan,

Chaoqi Chen,

Zeyu Zhang,

Yue Huang,

Kun Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Luyao and Yuan, Yuxuan and Chen, Chaoqi and Zhang, Zeyu and Huang, Yue and Zhang, Kun}, title = {OCRT: Boosting Foundation Models in the Open World with Object-Concept-Relation Triad}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25422-25433} }
SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction: Yutao Tang,

Yuxiang Guo,

Deming Li,

Cheng Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Yutao and Guo, Yuxiang and Li, Deming and Peng, Cheng}, title = {SPARS3R: Semantic Prior Alignment and Regularization for Sparse 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26810-26821} }
VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation: Hanzhi Chen,

Boyang Sun,

Anran Zhang,

Marc Pollefeys,

Stefan Leutenegger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Hanzhi and Sun, Boyang and Zhang, Anran and Pollefeys, Marc and Leutenegger, Stefan}, title = {VidBot: Learning Generalizable 3D Actions from In-the-Wild 2D Human Videos for Zero-Shot Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27661-27672} }
Which Viewpoint Shows it Best? Language for Weakly Supervising View Selection in Multi-view Instructional Videos: Sagnik Majumder,

Tushar Nagarajan,

Ziad Al-Halah,

Reina Pradhan,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Majumder_2025_CVPR, author = {Majumder, Sagnik and Nagarajan, Tushar and Al-Halah, Ziad and Pradhan, Reina and Grauman, Kristen}, title = {Which Viewpoint Shows it Best? Language for Weakly Supervising View Selection in Multi-view Instructional Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29016-29028} }
Adaptive Keyframe Sampling for Long Video Understanding: Xi Tang,

Jihao Qiu,

Lingxi Xie,

Yunjie Tian,

Jianbin Jiao,

Qixiang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Xi and Qiu, Jihao and Xie, Lingxi and Tian, Yunjie and Jiao, Jianbin and Ye, Qixiang}, title = {Adaptive Keyframe Sampling for Long Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29118-29128} }
Person De-reidentification: A Variation-guided Identity Shift Modeling: Yi-Xing Peng,

Yu-Ming Tang,

Kun-Yu Lin,

Qize Yang,

Jingke Meng,

Xihan Wei,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Yi-Xing and Tang, Yu-Ming and Lin, Kun-Yu and Yang, Qize and Meng, Jingke and Wei, Xihan and Zheng, Wei-Shi}, title = {Person De-reidentification: A Variation-guided Identity Shift Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29331-29341} }
DiGIT: Multi-Dilated Gated Encoder and Central-Adjacent Region Integrated Decoder for Temporal Action Detection Transformer: Ho-Joong Kim,

Yearang Lee,

Jung-Ho Hong,

Seong-Whan Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Ho-Joong and Lee, Yearang and Hong, Jung-Ho and Lee, Seong-Whan}, title = {DiGIT: Multi-Dilated Gated Encoder and Central-Adjacent Region Integrated Decoder for Temporal Action Detection Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24286-24296} }
Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion: Jiuhai Chen,

Jianwei Yang,

Haiping Wu,

Dianqi Li,

Jianfeng Gao,

Tianyi Zhou,

Bin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jiuhai and Yang, Jianwei and Wu, Haiping and Li, Dianqi and Gao, Jianfeng and Zhou, Tianyi and Xiao, Bin}, title = {Florence-VL: Enhancing Vision-Language Models with Generative Vision Encoder and Depth-Breadth Fusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24928-24938} }
Realistic Test-Time Adaptation of Vision-Language Models: Maxime Zanella,

Clément Fuchs,

Christophe De Vleeschouwer,

Ismail Ben Ayed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zanella_2025_CVPR, author = {Zanella, Maxime and Fuchs, Cl\'ement and De Vleeschouwer, Christophe and Ben Ayed, Ismail}, title = {Realistic Test-Time Adaptation of Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25103-25112} }
SelfSplat: Pose-Free and 3D Prior-Free Generalizable 3D Gaussian Splatting: Gyeongjin Kang,

Jisang Yoo,

Jihyeon Park,

Seungtae Nam,

Hyeonsoo Im,

Sangheon Shin,

Sangpil Kim,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2025_CVPR, author = {Kang, Gyeongjin and Yoo, Jisang and Park, Jihyeon and Nam, Seungtae and Im, Hyeonsoo and Shin, Sangheon and Kim, Sangpil and Park, Eunbyung}, title = {SelfSplat: Pose-Free and 3D Prior-Free Generalizable 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22012-22022} }
Enhancing Virtual Try-On with Synthetic Pairs and Error-Aware Noise Scheduling: Nannan Li,

Kevin J. Shih,

Bryan A. Plummer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Nannan and Shih, Kevin J. and Plummer, Bryan A.}, title = {Enhancing Virtual Try-On with Synthetic Pairs and Error-Aware Noise Scheduling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21238-21247} }
Exploring Simple Open-Vocabulary Semantic Segmentation: Zihang Lai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Zihang}, title = {Exploring Simple Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30221-30230} }
MP-GUI: Modality Perception with MLLMs for GUI Understanding: Ziwei Wang,

Weizhi Chen,

Leyang Yang,

Sheng Zhou,

Shengchu Zhao,

Hanbei Zhan,

Jiongchao Jin,

Liangcheng Li,

Zirui Shao,

Jiajun Bu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ziwei and Chen, Weizhi and Yang, Leyang and Zhou, Sheng and Zhao, Shengchu and Zhan, Hanbei and Jin, Jiongchao and Li, Liangcheng and Shao, Zirui and Bu, Jiajun}, title = {MP-GUI: Modality Perception with MLLMs for GUI Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29711-29721} }
Improving Adversarial Transferability on Vision Transformers via Forward Propagation Refinement: Yuchen Ren,

Zhengyu Zhao,

Chenhao Lin,

Bo Yang,

Lu Zhou,

Zhe Liu,

Chao Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Yuchen and Zhao, Zhengyu and Lin, Chenhao and Yang, Bo and Zhou, Lu and Liu, Zhe and Shen, Chao}, title = {Improving Adversarial Transferability on Vision Transformers via Forward Propagation Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25071-25080} }
Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection: Gensheng Pei,

Tao Chen,

Yujia Wang,

Xinhao Cai,

Xiangbo Shu,

Tianfei Zhou,

Yazhou Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2025_CVPR, author = {Pei, Gensheng and Chen, Tao and Wang, Yujia and Cai, Xinhao and Shu, Xiangbo and Zhou, Tianfei and Yao, Yazhou}, title = {Seeing What Matters: Empowering CLIP with Patch Generation-to-Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24862-24872} }
Erasing Undesirable Influence in Diffusion Models: Jing Wu,

Trung Le,

Munawar Hayat,

Mehrtash Harandi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jing and Le, Trung and Hayat, Munawar and Harandi, Mehrtash}, title = {Erasing Undesirable Influence in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28263-28273} }
Closest Neighbors are Harmful for Lightweight Masked Auto-encoders: Jian Meng,

Ahmed Hasssan,

Li Yang,

Deliang Fan,

Jinwoo Shin,

Jae-sun Seo; [pdf] [supp]
[bibtex]
@InProceedings{Meng_2025_CVPR, author = {Meng, Jian and Hasssan, Ahmed and Yang, Li and Fan, Deliang and Shin, Jinwoo and Seo, Jae-sun}, title = {Closest Neighbors are Harmful for Lightweight Masked Auto-encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25230-25239} }
Decouple-Then-Merge: Finetune Diffusion Models as Multi-Task Learning: Qianli Ma,

Xuefei Ning,

Dongrui Liu,

Li Niu,

Linfeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Qianli and Ning, Xuefei and Liu, Dongrui and Niu, Li and Zhang, Linfeng}, title = {Decouple-Then-Merge: Finetune Diffusion Models as Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23281-23291} }
HELVIPAD: A Real-World Dataset for Omnidirectional Stereo Depth Estimation: Mehdi Zayene,

Jannik Endres,

Albias Havolli,

Charles Corbière,

Salim Cherkaoui,

Alexandre Kontouli,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zayene_2025_CVPR, author = {Zayene, Mehdi and Endres, Jannik and Havolli, Albias and Corbi\`ere, Charles and Cherkaoui, Salim and Kontouli, Alexandre and Alahi, Alexandre}, title = {HELVIPAD: A Real-World Dataset for Omnidirectional Stereo Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26975-26984} }
Towards Enhanced Image Inpainting: Mitigating Unwanted Object Insertion and Preserving Color Consistency: Yikai Wang,

Chenjie Cao,

Junqiu Yu,

Ke Fan,

Xiangyang Xue,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yikai and Cao, Chenjie and Yu, Junqiu and Fan, Ke and Xue, Xiangyang and Fu, Yanwei}, title = {Towards Enhanced Image Inpainting: Mitigating Unwanted Object Insertion and Preserving Color Consistency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23237-23248} }
Practical Solutions to the Relative Pose of Three Calibrated Cameras: Charalambos Tzamos,

Viktor Kocur,

Yaqing Ding,

Daniel Barath,

Zuzana Berger Haladova,

Torsten Sattler,

Zuzana Kukelova; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tzamos_2025_CVPR, author = {Tzamos, Charalambos and Kocur, Viktor and Ding, Yaqing and Barath, Daniel and Haladova, Zuzana Berger and Sattler, Torsten and Kukelova, Zuzana}, title = {Practical Solutions to the Relative Pose of Three Calibrated Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21913-21923} }
PARC: A Quantitative Framework Uncovering the Symmetries within Vision Language Models: Jenny Schmalfuss,

Nadine Chang,

Vibashan VS,

Maying Shen,

Andres Bruhn,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schmalfuss_2025_CVPR, author = {Schmalfuss, Jenny and Chang, Nadine and VS, Vibashan and Shen, Maying and Bruhn, Andres and Alvarez, Jose M.}, title = {PARC: A Quantitative Framework Uncovering the Symmetries within Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25081-25091} }
RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins: Yao Mu,

Tianxing Chen,

Zanxin Chen,

Shijia Peng,

Zhiqian Lan,

Zeyu Gao,

Zhixuan Liang,

Qiaojun Yu,

Yude Zou,

Mingkun Xu,

Lunkai Lin,

Zhiqiang Xie,

Mingyu Ding,

Ping Luo; [pdf] [supp]
[bibtex]
@InProceedings{Mu_2025_CVPR, author = {Mu, Yao and Chen, Tianxing and Chen, Zanxin and Peng, Shijia and Lan, Zhiqian and Gao, Zeyu and Liang, Zhixuan and Yu, Qiaojun and Zou, Yude and Xu, Mingkun and Lin, Lunkai and Xie, Zhiqiang and Ding, Mingyu and Luo, Ping}, title = {RoboTwin: Dual-Arm Robot Benchmark with Generative Digital Twins}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27649-27660} }
AnimateAnything: Consistent and Controllable Animation for Video Generation: Guojun Lei,

Chi Wang,

Rong Zhang,

Yikai Wang,

Hong Li,

Weiwei Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Guojun and Wang, Chi and Zhang, Rong and Wang, Yikai and Li, Hong and Xu, Weiwei}, title = {AnimateAnything: Consistent and Controllable Animation for Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27946-27956} }
PRaDA: Projective Radial Distortion Averaging: Daniil Sinitsyn,

Linus Härenstam-Nielsen,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Sinitsyn_2025_CVPR, author = {Sinitsyn, Daniil and H\"arenstam-Nielsen, Linus and Cremers, Daniel}, title = {PRaDA: Projective Radial Distortion Averaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21902-21912} }
GenAssets: Generating in-the-wild 3D Assets in Latent Space: Ze Yang,

Jingkang Wang,

Haowei Zhang,

Sivabalan Manivasagam,

Yun Chen,

Raquel Urtasun; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Ze and Wang, Jingkang and Zhang, Haowei and Manivasagam, Sivabalan and Chen, Yun and Urtasun, Raquel}, title = {GenAssets: Generating in-the-wild 3D Assets in Latent Space}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22392-22403} }
Low-Rank Adaptation in Multilinear Operator Networks for Security-Preserving Incremental Learning: Huu Binh Ta,

Duc Nguyen,

Quyen Tran,

Toan Tran,

Tung Pham; [pdf] [supp]
[bibtex]
@InProceedings{Ta_2025_CVPR, author = {Ta, Huu Binh and Nguyen, Duc and Tran, Quyen and Tran, Toan and Pham, Tung}, title = {Low-Rank Adaptation in Multilinear Operator Networks for Security-Preserving Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24341-24350} }
FiRe: Fixed-points of Restoration Priors for Solving Inverse Problems: Matthieu Terris,

Ulugbek S. Kamilov,

Thomas Moreau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Terris_2025_CVPR, author = {Terris, Matthieu and Kamilov, Ulugbek S. and Moreau, Thomas}, title = {FiRe: Fixed-points of Restoration Priors for Solving Inverse Problems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23185-23194} }
CoMBO: Conflict Mitigation via Branched Optimization for Class Incremental Segmentation: Kai Fang,

Anqi Zhang,

Guangyu Gao,

Jianbo Jiao,

Chi Harold Liu,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Kai and Zhang, Anqi and Gao, Guangyu and Jiao, Jianbo and Liu, Chi Harold and Wei, Yunchao}, title = {CoMBO: Conflict Mitigation via Branched Optimization for Class Incremental Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25667-25676} }
Recurrent Feature Mining and Keypoint Mixup Padding for Category-Agnostic Pose Estimation: Junjie Chen,

Weilong Chen,

Yifan Zuo,

Yuming Fang; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Junjie and Chen, Weilong and Zuo, Yifan and Fang, Yuming}, title = {Recurrent Feature Mining and Keypoint Mixup Padding for Category-Agnostic Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22035-22044} }
Unbiasing through Textual Descriptions: Mitigating Representation Bias in Video Benchmarks: Nina Shvetsova,

Arsha Nagrani,

Bernt Schiele,

Hilde Kuehne,

Christian Rupprecht; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shvetsova_2025_CVPR, author = {Shvetsova, Nina and Nagrani, Arsha and Schiele, Bernt and Kuehne, Hilde and Rupprecht, Christian}, title = {Unbiasing through Textual Descriptions: Mitigating Representation Bias in Video Benchmarks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29050-29059} }
Embodied Scene Understanding for Vision Language Models via MetaVQA: Weizhen Wang,

Chenda Duan,

Zhenghao Peng,

Yuxin Liu,

Bolei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Weizhen and Duan, Chenda and Peng, Zhenghao and Liu, Yuxin and Zhou, Bolei}, title = {Embodied Scene Understanding for Vision Language Models via MetaVQA}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22453-22464} }
Learning Temporally Consistent Video Depth from Video Diffusion Priors: Jiahao Shao,

Yuanbo Yang,

Hongyu Zhou,

Youmin Zhang,

Yujun Shen,

Vitor Guizilini,

Yue Wang,

Matteo Poggi,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2025_CVPR, author = {Shao, Jiahao and Yang, Yuanbo and Zhou, Hongyu and Zhang, Youmin and Shen, Yujun and Guizilini, Vitor and Wang, Yue and Poggi, Matteo and Liao, Yiyi}, title = {Learning Temporally Consistent Video Depth from Video Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22841-22852} }
Samba: A Unified Mamba-based Framework for General Salient Object Detection: Jiahao He,

Keren Fu,

Xiaohong Liu,

Qijun Zhao; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Jiahao and Fu, Keren and Liu, Xiaohong and Zhao, Qijun}, title = {Samba: A Unified Mamba-based Framework for General Salient Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25314-25324} }
LesionLocator: Zero-Shot Universal Tumor Segmentation and Tracking in 3D Whole-Body Imaging: Maximilian Rokuss,

Yannick Kirchhoff,

Seval Akbal,

Balint Kovacs,

Saikat Roy,

Constantin Ulrich,

Tassilo Wald,

Lukas T. Rotkopf,

Heinz-Peter Schlemmer,

Klaus Maier-Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rokuss_2025_CVPR, author = {Rokuss, Maximilian and Kirchhoff, Yannick and Akbal, Seval and Kovacs, Balint and Roy, Saikat and Ulrich, Constantin and Wald, Tassilo and Rotkopf, Lukas T. and Schlemmer, Heinz-Peter and Maier-Hein, Klaus}, title = {LesionLocator: Zero-Shot Universal Tumor Segmentation and Tracking in 3D Whole-Body Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30872-30885} }
DOF-GS: Adjustable Depth-of-Field 3D Gaussian Splatting for Post-Capture Refocusing, Defocus Rendering and Blur Removal: Yujie Wang,

Praneeth Chakravarthula,

Baoquan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yujie and Chakravarthula, Praneeth and Chen, Baoquan}, title = {DOF-GS: Adjustable Depth-of-Field 3D Gaussian Splatting for Post-Capture Refocusing, Defocus Rendering and Blur Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21297-21306} }
The Photographer's Eye: Teaching Multimodal Large Language Models to See, and Critique Like Photographers: Daiqing Qi,

Handong Zhao,

Jing Shi,

Simon Jenni,

Yifei Fan,

Franck Dernoncourt,

Scott Cohen,

Sheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2025_CVPR, author = {Qi, Daiqing and Zhao, Handong and Shi, Jing and Jenni, Simon and Fan, Yifei and Dernoncourt, Franck and Cohen, Scott and Li, Sheng}, title = {The Photographer's Eye: Teaching Multimodal Large Language Models to See, and Critique Like Photographers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24807-24816} }
Synergizing Motion and Appearance: Multi-Scale Compensatory Codebooks for Talking Head Video Generation: Shuling Zhao,

Fa-Ting Hong,

Xiaoshui Huang,

Dan Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shuling and Hong, Fa-Ting and Huang, Xiaoshui and Xu, Dan}, title = {Synergizing Motion and Appearance: Multi-Scale Compensatory Codebooks for Talking Head Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26232-26241} }
GraphI2P: Image-to-Point Cloud Registration with Exploring Pattern of Correspondence via Graph Learning: Lin Bie,

Shouan Pan,

Siqi Li,

Yining Zhao,

Yue Gao; [pdf] [supp]
[bibtex]
@InProceedings{Bie_2025_CVPR, author = {Bie, Lin and Pan, Shouan and Li, Siqi and Zhao, Yining and Gao, Yue}, title = {GraphI2P: Image-to-Point Cloud Registration with Exploring Pattern of Correspondence via Graph Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22161-22171} }
SoftVQ-VAE: Efficient 1-Dimensional Continuous Tokenizer: Hao Chen,

Ze Wang,

Xiang Li,

Ximeng Sun,

Fangyi Chen,

Jiang Liu,

Jindong Wang,

Bhiksha Raj,

Zicheng Liu,

Emad Barsoum; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Hao and Wang, Ze and Li, Xiang and Sun, Ximeng and Chen, Fangyi and Liu, Jiang and Wang, Jindong and Raj, Bhiksha and Liu, Zicheng and Barsoum, Emad}, title = {SoftVQ-VAE: Efficient 1-Dimensional Continuous Tokenizer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28358-28370} }
DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models: Haoyang Li,

Liang Wang,

Chao Wang,

Jing Jiang,

Yan Peng,

Guodong Long; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haoyang and Wang, Liang and Wang, Chao and Jiang, Jing and Peng, Yan and Long, Guodong}, title = {DPC: Dual-Prompt Collaboration for Tuning Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25623-25632} }
AIM-Fair: Advancing Algorithmic Fairness via Selectively Fine-Tuning Biased Models with Contextual Synthetic Data: Zengqun Zhao,

Ziquan Liu,

Yu Cao,

Shaogang Gong,

Ioannis Patras; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zengqun and Liu, Ziquan and Cao, Yu and Gong, Shaogang and Patras, Ioannis}, title = {AIM-Fair: Advancing Algorithmic Fairness via Selectively Fine-Tuning Biased Models with Contextual Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28748-28758} }
Robust Multi-Object 4D Generation for In-the-wild Videos: Wen-Hsuan Chu,

Lei Ke,

Jianmeng Liu,

Mingxiao Huo,

Pavel Tokmakov,

Katerina Fragkiadaki; [pdf] [supp]
[bibtex]
@InProceedings{Chu_2025_CVPR, author = {Chu, Wen-Hsuan and Ke, Lei and Liu, Jianmeng and Huo, Mingxiao and Tokmakov, Pavel and Fragkiadaki, Katerina}, title = {Robust Multi-Object 4D Generation for In-the-wild Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22067-22077} }
Mono-InternVL: Pushing the Boundaries of Monolithic Multimodal Large Language Models with Endogenous Visual Pre-training: Gen Luo,

Xue Yang,

Wenhan Dou,

Zhaokai Wang,

Jiawen Liu,

Jifeng Dai,

Yu Qiao,

Xizhou Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Gen and Yang, Xue and Dou, Wenhan and Wang, Zhaokai and Liu, Jiawen and Dai, Jifeng and Qiao, Yu and Zhu, Xizhou}, title = {Mono-InternVL: Pushing the Boundaries of Monolithic Multimodal Large Language Models with Endogenous Visual Pre-training}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24960-24971} }
FLAVC: Learned Video Compression with Feature Level Attention: Chun Zhang,

Heming Sun,

Jiro Katto; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chun and Sun, Heming and Katto, Jiro}, title = {FLAVC: Learned Video Compression with Feature Level Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28019-28028} }
An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models: Wentao Qu,

Jing Wang,

YongShun Gong,

Xiaoshui Huang,

Liang Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Wentao and Wang, Jing and Gong, YongShun and Huang, Xiaoshui and Xiao, Liang}, title = {An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27325-27335} }
PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors: Guangshun Wei,

Yuan Feng,

Long Ma,

Chen Wang,

Yuanfeng Zhou,

Changjian Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2025_CVPR, author = {Wei, Guangshun and Feng, Yuan and Ma, Long and Wang, Chen and Zhou, Yuanfeng and Li, Changjian}, title = {PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27243-27253} }
Your ViT is Secretly an Image Segmentation Model: Tommie Kerssies,

Niccolò Cavagnero,

Alexander Hermans,

Narges Norouzi,

Giuseppe Averta,

Bastian Leibe,

Gijs Dubbelman,

Daan de Geus; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kerssies_2025_CVPR, author = {Kerssies, Tommie and Cavagnero, Niccol\`o and Hermans, Alexander and Norouzi, Narges and Averta, Giuseppe and Leibe, Bastian and Dubbelman, Gijs and de Geus, Daan}, title = {Your ViT is Secretly an Image Segmentation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25303-25313} }
Cross-Rejective Open-Set SAR Image Registration: Shasha Mao,

Shiming Lu,

Zhaolong Du,

Licheng Jiao,

Shuiping Gou,

Luntian Mou,

Xuequan Lu,

Lin Xiong,

Yimeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Shasha and Lu, Shiming and Du, Zhaolong and Jiao, Licheng and Gou, Shuiping and Mou, Luntian and Lu, Xuequan and Xiong, Lin and Zhang, Yimeng}, title = {Cross-Rejective Open-Set SAR Image Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23027-23036} }
SplineGS: Robust Motion-Adaptive Spline for Real-Time Dynamic 3D Gaussians from Monocular Video: Jongmin Park,

Minh-Quan Viet Bui,

Juan Luis Gonzalez Bello,

Jaeho Moon,

Jihyong Oh,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Jongmin and Bui, Minh-Quan Viet and Bello, Juan Luis Gonzalez and Moon, Jaeho and Oh, Jihyong and Kim, Munchurl}, title = {SplineGS: Robust Motion-Adaptive Spline for Real-Time Dynamic 3D Gaussians from Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26866-26875} }
Multi-modal Knowledge Distillation-based Human Trajectory Forecasting: Jaewoo Jeong,

Seohee Lee,

Daehee Park,

Giwon Lee,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Jaewoo and Lee, Seohee and Park, Daehee and Lee, Giwon and Yoon, Kuk-Jin}, title = {Multi-modal Knowledge Distillation-based Human Trajectory Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24222-24233} }
ShiftwiseConv: Small Convolutional Kernel with Large Kernel Effect: Dachong Li,

Li Li,

Zhuangzhuang Chen,

Jianqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Dachong and Li, Li and Chen, Zhuangzhuang and Li, Jianqiang}, title = {ShiftwiseConv: Small Convolutional Kernel with Large Kernel Effect}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25281-25291} }
Object-Shot Enhanced Grounding Network for Egocentric Video: Yisen Feng,

Haoyu Zhang,

Meng Liu,

Weili Guan,

Liqiang Nie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Yisen and Zhang, Haoyu and Liu, Meng and Guan, Weili and Nie, Liqiang}, title = {Object-Shot Enhanced Grounding Network for Egocentric Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24190-24200} }
Ev-3DOD: Pushing the Temporal Boundaries of 3D Object Detection with Event Cameras: Hoonhee Cho,

Jae-Young Kang,

Youngho Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2025_CVPR, author = {Cho, Hoonhee and Kang, Jae-Young and Kim, Youngho and Yoon, Kuk-Jin}, title = {Ev-3DOD: Pushing the Temporal Boundaries of 3D Object Detection with Event Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27197-27210} }
Nearly Zero-Cost Protection Against Mimicry by Personalized Diffusion Models: Namhyuk Ahn,

KiYoon Yoo,

Wonhyuk Ahn,

Daesik Kim,

Seung-Hun Nam; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahn_2025_CVPR, author = {Ahn, Namhyuk and Yoo, KiYoon and Ahn, Wonhyuk and Kim, Daesik and Nam, Seung-Hun}, title = {Nearly Zero-Cost Protection Against Mimicry by Personalized Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28801-28810} }
The Devil is in Temporal Token: High Quality Video Reasoning Segmentation: Sitong Gong,

Yunzhi Zhuge,

Lu Zhang,

Zongxin Yang,

Pingping Zhang,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2025_CVPR, author = {Gong, Sitong and Zhuge, Yunzhi and Zhang, Lu and Yang, Zongxin and Zhang, Pingping and Lu, Huchuan}, title = {The Devil is in Temporal Token: High Quality Video Reasoning Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29183-29192} }
LITA-GS: Illumination-Agnostic Novel View Synthesis via Reference-Free 3D Gaussian Splatting and Physical Priors: Han Zhou,

Wei Dong,

Jun Chen; [pdf]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Han and Dong, Wei and Chen, Jun}, title = {LITA-GS: Illumination-Agnostic Novel View Synthesis via Reference-Free 3D Gaussian Splatting and Physical Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21580-21589} }
T-FAKE: Synthesizing Thermal Images for Facial Landmarking: Philipp Flotho,

Moritz Piening,

Anna Kukleva,

Gabriele Steidl; [pdf] [supp]
[bibtex]
@InProceedings{Flotho_2025_CVPR, author = {Flotho, Philipp and Piening, Moritz and Kukleva, Anna and Steidl, Gabriele}, title = {T-FAKE: Synthesizing Thermal Images for Facial Landmarking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26356-26366} }
Multi-Resolution Pathology-Language Pre-training Model with Text-Guided Visual Representation: Shahad Albastaki,

Anabia Sohail,

Iyyakutti Iyappan Ganapathi,

Basit Alawode,

Asim Khan,

Sajid Javed,

Naoufel Werghi,

Mohammed Bennamoun,

Arif Mahmood; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Albastaki_2025_CVPR, author = {Albastaki, Shahad and Sohail, Anabia and Ganapathi, Iyyakutti Iyappan and Alawode, Basit and Khan, Asim and Javed, Sajid and Werghi, Naoufel and Bennamoun, Mohammed and Mahmood, Arif}, title = {Multi-Resolution Pathology-Language Pre-training Model with Text-Guided Visual Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25907-25919} }
PICD: Versatile Perceptual Image Compression with Diffusion Rendering: Tongda Xu,

Jiahao Li,

Bin Li,

Yan Wang,

Ya-Qin Zhang,

Yan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Tongda and Li, Jiahao and Li, Bin and Wang, Yan and Zhang, Ya-Qin and Lu, Yan}, title = {PICD: Versatile Perceptual Image Compression with Diffusion Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28436-28445} }
VideoSPatS: Video SPatiotemporal Splines for Disentangled Occlusion, Appearance and Motion Modeling and Editing: Juan Luis Gonzalez,

Xu Yao,

Alex Whelan,

Kyle Olszewski,

Hyeongwoo Kim,

Pablo Garrido; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gonzalez_2025_CVPR, author = {Gonzalez, Juan Luis and Yao, Xu and Whelan, Alex and Olszewski, Kyle and Kim, Hyeongwoo and Garrido, Pablo}, title = {VideoSPatS: Video SPatiotemporal Splines for Disentangled Occlusion, Appearance and Motion Modeling and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22901-22910} }
Six-CD: Benchmarking Concept Removals for Text-to-image Diffusion Models: Jie Ren,

Kangrui Chen,

Yingqian Cui,

Shenglai Zeng,

Hui Liu,

Yue Xing,

Jiliang Tang,

Lingjuan Lyu; [pdf] [supp]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Jie and Chen, Kangrui and Cui, Yingqian and Zeng, Shenglai and Liu, Hui and Xing, Yue and Tang, Jiliang and Lyu, Lingjuan}, title = {Six-CD: Benchmarking Concept Removals for Text-to-image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28769-28778} }
Black-Box Forgery Attacks on Semantic Watermarks for Diffusion Models: Andreas Müller,

Denis Lukovnikov,

Jonas Thietke,

Asja Fischer,

Erwin Quiring; [pdf] [supp]
[bibtex]
@InProceedings{Muller_2025_CVPR, author = {M\"uller, Andreas and Lukovnikov, Denis and Thietke, Jonas and Fischer, Asja and Quiring, Erwin}, title = {Black-Box Forgery Attacks on Semantic Watermarks for Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20937-20946} }
VidSeg: Training-free Video Semantic Segmentation based on Diffusion Models: Qian Wang,

Abdelrahman Eldesokey,

Mohit Mendiratta,

Fangneng Zhan,

Adam Kortylewski,

Christian Theobalt,

Peter Wonka; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qian and Eldesokey, Abdelrahman and Mendiratta, Mohit and Zhan, Fangneng and Kortylewski, Adam and Theobalt, Christian and Wonka, Peter}, title = {VidSeg: Training-free Video Semantic Segmentation based on Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22985-22994} }
PersonaBooth: Personalized Text-to-Motion Generation: Boeun Kim,

Hea In Jeong,

JungHoon Sung,

Yihua Cheng,

Jeongmin Lee,

Ju Yong Chang,

Sang-Il Choi,

Younggeun Choi,

Saim Shin,

Jungho Kim,

Hyung Jin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Boeun and Jeong, Hea In and Sung, JungHoon and Cheng, Yihua and Lee, Jeongmin and Chang, Ju Yong and Choi, Sang-Il and Choi, Younggeun and Shin, Saim and Kim, Jungho and Chang, Hyung Jin}, title = {PersonaBooth: Personalized Text-to-Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22756-22765} }
Star with Bilinear Mapping: Zelin Peng,

Yu Huang,

Zhengqin Xu,

Feilong Tang,

Ming Hu,

Xiaokang Yang,

Wei Shen; [pdf]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Zelin and Huang, Yu and Xu, Zhengqin and Tang, Feilong and Hu, Ming and Yang, Xiaokang and Shen, Wei}, title = {Star with Bilinear Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25292-25302} }
DIFIX3D+: Improving 3D Reconstructions with Single-Step Diffusion Models: Jay Zhangjie Wu,

Yuxuan Zhang,

Haithem Turki,

Xuanchi Ren,

Jun Gao,

Mike Zheng Shou,

Sanja Fidler,

Zan Gojcic,

Huan Ling; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jay Zhangjie and Zhang, Yuxuan and Turki, Haithem and Ren, Xuanchi and Gao, Jun and Shou, Mike Zheng and Fidler, Sanja and Gojcic, Zan and Ling, Huan}, title = {DIFIX3D+: Improving 3D Reconstructions with Single-Step Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26024-26035} }
Time of the Flight of the Gaussians: Optimizing Depth Indirectly in Dynamic Radiance Fields: Runfeng Li,

Mikhail Okunev,

Zixuan Guo,

Anh Ha Duong,

Christian Richardt,

Matthew O'Toole,

James Tompkin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Runfeng and Okunev, Mikhail and Guo, Zixuan and Duong, Anh Ha and Richardt, Christian and O'Toole, Matthew and Tompkin, James}, title = {Time of the Flight of the Gaussians: Optimizing Depth Indirectly in Dynamic Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21021-21030} }
Align3R: Aligned Monocular Depth Estimation for Dynamic Videos: Jiahao Lu,

Tianyu Huang,

Peng Li,

Zhiyang Dou,

Cheng Lin,

Zhiming Cui,

Zhen Dong,

Sai-Kit Yeung,

Wenping Wang,

Yuan Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Jiahao and Huang, Tianyu and Li, Peng and Dou, Zhiyang and Lin, Cheng and Cui, Zhiming and Dong, Zhen and Yeung, Sai-Kit and Wang, Wenping and Liu, Yuan}, title = {Align3R: Aligned Monocular Depth Estimation for Dynamic Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22820-22830} }
Seek Common Ground While Reserving Differences: Semi-Supervised Image-Text Sentiment Recognition: Wuyou Xia,

Guoli Jia,

Sicheng Zhao,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Wuyou and Jia, Guoli and Zhao, Sicheng and Yang, Jufeng}, title = {Seek Common Ground While Reserving Differences: Semi-Supervised Image-Text Sentiment Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29601-29611} }
Anomize: Better Open Vocabulary Video Anomaly Detection: Fei Li,

Wenxuan Liu,

Jingjing Chen,

Ruixu Zhang,

Yuran Wang,

Xian Zhong,

Zheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Fei and Liu, Wenxuan and Chen, Jingjing and Zhang, Ruixu and Wang, Yuran and Zhong, Xian and Wang, Zheng}, title = {Anomize: Better Open Vocabulary Video Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29203-29212} }
Efficient Diffusion as Low Light Enhancer: Guanzhou Lan,

Qianli Ma,

Yuqi Yang,

Zhigang Wang,

Dong Wang,

Xuelong Li,

Bin Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lan_2025_CVPR, author = {Lan, Guanzhou and Ma, Qianli and Yang, Yuqi and Wang, Zhigang and Wang, Dong and Li, Xuelong and Zhao, Bin}, title = {Efficient Diffusion as Low Light Enhancer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21277-21286} }
HyperNVD: Accelerating Neural Video Decomposition via Hypernetworks: Maria Pilligua,

Danna Xue,

Javier Vazquez-Corral; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pilligua_2025_CVPR, author = {Pilligua, Maria and Xue, Danna and Vazquez-Corral, Javier}, title = {HyperNVD: Accelerating Neural Video Decomposition via Hypernetworks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22933-22942} }
Instant Adversarial Purification with Adversarial Consistency Distillation: Chun Tong Lei,

Hon Ming Yam,

Zhongliang Guo,

Yifei Qian,

Chun Pong Lau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Chun Tong and Yam, Hon Ming and Guo, Zhongliang and Qian, Yifei and Lau, Chun Pong}, title = {Instant Adversarial Purification with Adversarial Consistency Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24331-24340} }
Feature Selection for Latent Factor Models: Rittwika Kansabanik,

Adrian Barbu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kansabanik_2025_CVPR, author = {Kansabanik, Rittwika and Barbu, Adrian}, title = {Feature Selection for Latent Factor Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30742-30751} }
Preserve or Modify? Context-Aware Evaluation for Balancing Preservation and Modification in Text-Guided Image Editing: Yoonjeon Kim,

Soohyun Ryu,

Yeonsung Jung,

Hyunkoo Lee,

Joowon Kim,

June Yong Yang,

Jaeryong Hwang,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Yoonjeon and Ryu, Soohyun and Jung, Yeonsung and Lee, Hyunkoo and Kim, Joowon and Yang, June Yong and Hwang, Jaeryong and Yang, Eunho}, title = {Preserve or Modify? Context-Aware Evaluation for Balancing Preservation and Modification in Text-Guided Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23474-23483} }
Decoupling Training-Free Guided Diffusion by ADMM: Youyuan Zhang,

Zehua Liu,

Zenan Li,

Zhaoyu Li,

James J. Clark,

Xujie Si; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Youyuan and Liu, Zehua and Li, Zenan and Li, Zhaoyu and Clark, James J. and Si, Xujie}, title = {Decoupling Training-Free Guided Diffusion by ADMM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23292-23302} }
SwiftEdit: Lightning Fast Text-Guided Image Editing via One-Step Diffusion: Trong-Tung Nguyen,

Quang Nguyen,

Khoi Nguyen,

Anh Tran,

Cuong Pham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2025_CVPR, author = {Nguyen, Trong-Tung and Nguyen, Quang and Nguyen, Khoi and Tran, Anh and Pham, Cuong}, title = {SwiftEdit: Lightning Fast Text-Guided Image Editing via One-Step Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21492-21501} }
Learning from Synchronization: Self-Supervised Uncalibrated Multi-View Person Association in Challenging Scenes: Keqi Chen,

Vinkle Srivastav,

Didier Mutter,

Nicolas Padoy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Keqi and Srivastav, Vinkle and Mutter, Didier and Padoy, Nicolas}, title = {Learning from Synchronization: Self-Supervised Uncalibrated Multi-View Person Association in Challenging Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24419-24428} }
CLIP-driven Coarse-to-fine Semantic Guidance for Fine-grained Open-set Semi-supervised Learning: Xiaokun Li,

Yaping Huang,

Qingji Guan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaokun and Huang, Yaping and Guan, Qingji}, title = {CLIP-driven Coarse-to-fine Semantic Guidance for Fine-grained Open-set Semi-supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30312-30321} }
A Simple Data Augmentation for Feature Distribution Skewed Federated Learning: Yunlu Yan,

Huazhu Fu,

Yuexiang Li,

Jinheng Xie,

Jun Ma,

Guang Yang,

Lei Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Yunlu and Fu, Huazhu and Li, Yuexiang and Xie, Jinheng and Ma, Jun and Yang, Guang and Zhu, Lei}, title = {A Simple Data Augmentation for Feature Distribution Skewed Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25749-25758} }
GLane3D: Detecting Lanes with Graph of 3D Keypoints: Halil İbrahim Öztürk,

Muhammet Esat Kalfaoğlu,

Ozsel Kilinc; [pdf] [supp]
[bibtex]
@InProceedings{Ozturk_2025_CVPR, author = {\"Ozt\"urk, Halil \.Ibrahim and Kalfao\u{g}lu, Muhammet Esat and Kilinc, Ozsel}, title = {GLane3D: Detecting Lanes with Graph of 3D Keypoints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27508-27518} }
Minimal Interaction Seperated Tuning: A New Paradigm for Visual Adaptation: Ningyuan Tang,

Minghao Fu,

Jianxin Wu; [pdf]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Ningyuan and Fu, Minghao and Wu, Jianxin}, title = {Minimal Interaction Seperated Tuning: A New Paradigm for Visual Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25208-25217} }
Attraction Diminishing and Distributing for Few-Shot Class-Incremental Learning: Li-Jun Zhao,

Zhen-Duo Chen,

Yongxin Wang,

Xin Luo,

Xin-Shun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Li-Jun and Chen, Zhen-Duo and Wang, Yongxin and Luo, Xin and Xu, Xin-Shun}, title = {Attraction Diminishing and Distributing for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25657-25666} }
4DTAM: Non-Rigid Tracking and Mapping via Dynamic Surface Gaussians: Hidenobu Matsuki,

Gwangbin Bae,

Andrew J. Davison; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsuki_2025_CVPR, author = {Matsuki, Hidenobu and Bae, Gwangbin and Davison, Andrew J.}, title = {4DTAM: Non-Rigid Tracking and Mapping via Dynamic Surface Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26921-26932} }
Unseen Visual Anomaly Generation: Han Sun,

Yunkang Cao,

Hao Dong,

Olga Fink; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Han and Cao, Yunkang and Dong, Hao and Fink, Olga}, title = {Unseen Visual Anomaly Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25508-25517} }
T2ICount: Enhancing Cross-modal Understanding for Zero-Shot Counting: Yifei Qian,

Zhongliang Guo,

Bowen Deng,

Chun Tong Lei,

Shuai Zhao,

Chun Pong Lau,

Xiaopeng Hong,

Michael P. Pound; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Yifei and Guo, Zhongliang and Deng, Bowen and Lei, Chun Tong and Zhao, Shuai and Lau, Chun Pong and Hong, Xiaopeng and Pound, Michael P.}, title = {T2ICount: Enhancing Cross-modal Understanding for Zero-Shot Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25336-25345} }
ReNeg: Learning Negative Embedding with Reward Guidance: Xiaomin Li,

Yixuan Liu,

Takashi Isobe,

Xu Jia,

Qinpeng Cui,

Dong Zhou,

Dong Li,

You He,

Huchuan Lu,

Zhongdao Wang,

Emad Barsoum; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaomin and Liu, Yixuan and Isobe, Takashi and Jia, Xu and Cui, Qinpeng and Zhou, Dong and Li, Dong and He, You and Lu, Huchuan and Wang, Zhongdao and Barsoum, Emad}, title = {ReNeg: Learning Negative Embedding with Reward Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23636-23645} }
MotionPro: A Precise Motion Controller for Image-to-Video Generation: Zhongwei Zhang,

Fuchen Long,

Zhaofan Qiu,

Yingwei Pan,

Wu Liu,

Ting Yao,

Tao Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhongwei and Long, Fuchen and Qiu, Zhaofan and Pan, Yingwei and Liu, Wu and Yao, Ting and Mei, Tao}, title = {MotionPro: A Precise Motion Controller for Image-to-Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27957-27967} }
Goku: Flow Based Video Generative Foundation Models: Shoufa Chen,

Chongjian Ge,

Yuqi Zhang,

Yida Zhang,

Fengda Zhu,

Hao Yang,

Hongxiang Hao,

Hui Wu,

Zhichao Lai,

Yifei Hu,

Ting-Che Lin,

Shilong Zhang,

Fu Li,

Chuan Li,

Xing Wang,

Yanghua Peng,

Peize Sun,

Ping Luo,

Yi Jiang,

Zehuan Yuan,

Bingyue Peng,

Xiaobing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Shoufa and Ge, Chongjian and Zhang, Yuqi and Zhang, Yida and Zhu, Fengda and Yang, Hao and Hao, Hongxiang and Wu, Hui and Lai, Zhichao and Hu, Yifei and Lin, Ting-Che and Zhang, Shilong and Li, Fu and Li, Chuan and Wang, Xing and Peng, Yanghua and Sun, Peize and Luo, Ping and Jiang, Yi and Yuan, Zehuan and Peng, Bingyue and Liu, Xiaobing}, title = {Goku: Flow Based Video Generative Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23516-23527} }
WISH: Weakly Supervised Instance Segmentation using Heterogeneous Labels: Hyeokjun Kweon,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Kweon_2025_CVPR, author = {Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {WISH: Weakly Supervised Instance Segmentation using Heterogeneous Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25377-25387} }
Good, Cheap, and Fast: Overfitted Image Compression with Wasserstein Distortion: Jona Ballé,

Luca Versari,

Emilien Dupont,

Hyunjik Kim,

Matthias Bauer; [pdf] [supp]
[bibtex]
@InProceedings{Balle_2025_CVPR, author = {Ball\'e, Jona and Versari, Luca and Dupont, Emilien and Kim, Hyunjik and Bauer, Matthias}, title = {Good, Cheap, and Fast: Overfitted Image Compression with Wasserstein Distortion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23259-23268} }
Period-LLM: Extending the Periodic Capability of Multimodal Large Language Model: Yuting Zhang,

Hao Lu,

Qingyong Hu,

Yin Wang,

Kaishen Yuan,

Xin Liu,

Kaishun Wu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuting and Lu, Hao and Hu, Qingyong and Wang, Yin and Yuan, Kaishen and Liu, Xin and Wu, Kaishun}, title = {Period-LLM: Extending the Periodic Capability of Multimodal Large Language Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29237-29247} }
V2X-R: Cooperative LiDAR-4D Radar Fusion with Denoising Diffusion for 3D Object Detection: Xun Huang,

Jinlong Wang,

Qiming Xia,

Siheng Chen,

Bisheng Yang,

Xin Li,

Cheng Wang,

Chenglu Wen; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Xun and Wang, Jinlong and Xia, Qiming and Chen, Siheng and Yang, Bisheng and Li, Xin and Wang, Cheng and Wen, Chenglu}, title = {V2X-R: Cooperative LiDAR-4D Radar Fusion with Denoising Diffusion for 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27390-27400} }
TAROT: Towards Essentially Domain-Invariant Robustness with Theoretical Justification: Dongyoon Yang,

Jihu Lee,

Yongdai Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Dongyoon and Lee, Jihu and Kim, Yongdai}, title = {TAROT: Towards Essentially Domain-Invariant Robustness with Theoretical Justification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25780-25789} }
Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis: Jiangyong Huang,

Baoxiong Jia,

Yan Wang,

Ziyu Zhu,

Xiongkun Linghu,

Qing Li,

Song-Chun Zhu,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jiangyong and Jia, Baoxiong and Wang, Yan and Zhu, Ziyu and Linghu, Xiongkun and Li, Qing and Zhu, Song-Chun and Huang, Siyuan}, title = {Unveiling the Mist over 3D Vision-Language Understanding: Object-centric Evaluation with Chain-of-Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24570-24581} }
APT: Adaptive Personalized Training for Diffusion Models with Limited Data: JungWoo Chae,

Jiyoon Kim,

JaeWoong Choi,

Kyungyul Kim,

Sangheum Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chae_2025_CVPR, author = {Chae, JungWoo and Kim, Jiyoon and Choi, JaeWoong and Kim, Kyungyul and Hwang, Sangheum}, title = {APT: Adaptive Personalized Training for Diffusion Models with Limited Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28619-28628} }
SCAP: Transductive Test-Time Adaptation via Supportive Clique-based Attribute Prompting: Chenyu Zhang,

Kunlun Xu,

Zichen Liu,

Yuxin Peng,

Jiahuan Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Chenyu and Xu, Kunlun and Liu, Zichen and Peng, Yuxin and Zhou, Jiahuan}, title = {SCAP: Transductive Test-Time Adaptation via Supportive Clique-based Attribute Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30032-30041} }
Tracktention: Leveraging Point Tracking to Attend Videos Faster and Better: Zihang Lai,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Zihang and Vedaldi, Andrea}, title = {Tracktention: Leveraging Point Tracking to Attend Videos Faster and Better}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22809-22819} }
DocVLM: Make Your VLM an Efficient Reader: Mor Shpigel Nacson,

Aviad Aberdam,

Roy Ganz,

Elad Ben Avraham,

Alona Golts,

Yair Kittenplon,

Shai Mazor,

Ron Litman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nacson_2025_CVPR, author = {Nacson, Mor Shpigel and Aberdam, Aviad and Ganz, Roy and Ben Avraham, Elad and Golts, Alona and Kittenplon, Yair and Mazor, Shai and Litman, Ron}, title = {DocVLM: Make Your VLM an Efficient Reader}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29005-29015} }
Revisiting Source-Free Domain Adaptation: Insights into Representativeness, Generalization, and Variety: Ronghang Zhu,

Mengxuan Hu,

Weiming Zhuang,

Lingjuan Lyu,

Xiang Yu,

Sheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Ronghang and Hu, Mengxuan and Zhuang, Weiming and Lyu, Lingjuan and Yu, Xiang and Li, Sheng}, title = {Revisiting Source-Free Domain Adaptation: Insights into Representativeness, Generalization, and Variety}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25688-25697} }
Adaptive Unimodal Regulation for Balanced Multimodal Information Acquisition: Chengxiang Huang,

Yake Wei,

Zequn Yang,

Di Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Chengxiang and Wei, Yake and Yang, Zequn and Hu, Di}, title = {Adaptive Unimodal Regulation for Balanced Multimodal Information Acquisition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25854-25863} }
FLARE: Feed-forward Geometry, Appearance and Camera Estimation from Uncalibrated Sparse Views: Shangzhan Zhang,

Jianyuan Wang,

Yinghao Xu,

Nan Xue,

Christian Rupprecht,

Xiaowei Zhou,

Yujun Shen,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shangzhan and Wang, Jianyuan and Xu, Yinghao and Xue, Nan and Rupprecht, Christian and Zhou, Xiaowei and Shen, Yujun and Wetzstein, Gordon}, title = {FLARE: Feed-forward Geometry, Appearance and Camera Estimation from Uncalibrated Sparse Views}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21936-21947} }
Improving Gaussian Splatting with Localized Points Management: Haosen Yang,

Chenhao Zhang,

Wenqing Wang,

Marco Volino,

Adrian Hilton,

Li Zhang,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Haosen and Zhang, Chenhao and Wang, Wenqing and Volino, Marco and Hilton, Adrian and Zhang, Li and Zhu, Xiatian}, title = {Improving Gaussian Splatting with Localized Points Management}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21696-21705} }
One-Way Ticket: Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models: Senmao Li,

Lei Wang,

Kai Wang,

Tao Liu,

Jiehang Xie,

Joost van de Weijer,

Fahad Shahbaz Khan,

Shiqi Yang,

Yaxing Wang,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Senmao and Wang, Lei and Wang, Kai and Liu, Tao and Xie, Jiehang and van de Weijer, Joost and Khan, Fahad Shahbaz and Yang, Shiqi and Wang, Yaxing and Yang, Jian}, title = {One-Way Ticket: Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23563-23574} }
Domain Adaptive Diabetic Retinopathy Grading with Model Absence and Flowing Data: Wenxin Su,

Song Tang,

Xiaofeng Liu,

Xiaojing Yi,

Mao Ye,

Chunxiao Zu,

Jiahao Li,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Wenxin and Tang, Song and Liu, Xiaofeng and Yi, Xiaojing and Ye, Mao and Zu, Chunxiao and Li, Jiahao and Zhu, Xiatian}, title = {Domain Adaptive Diabetic Retinopathy Grading with Model Absence and Flowing Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28337-28346} }
LoRASculpt: Sculpting LoRA for Harmonizing General and Specialized Knowledge in Multimodal Large Language Models: Jian Liang,

Wenke Huang,

Guancheng Wan,

Qu Yang,

Mang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Jian and Huang, Wenke and Wan, Guancheng and Yang, Qu and Ye, Mang}, title = {LoRASculpt: Sculpting LoRA for Harmonizing General and Specialized Knowledge in Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26170-26180} }
SEAL: Semantic Attention Learning for Long Video Representation: Lan Wang,

Yujia Chen,

Du Tran,

Vishnu Naresh Boddeti,

Wen-Sheng Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lan and Chen, Yujia and Tran, Du and Boddeti, Vishnu Naresh and Chu, Wen-Sheng}, title = {SEAL: Semantic Attention Learning for Long Video Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26192-26201} }
SCFlow2: Plug-and-Play Object Pose Refiner with Shape-Constraint Scene Flow: Qingyuan Wang,

Rui Song,

Jiaojiao Li,

Kerui Cheng,

David Ferstl,

Yinlin Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Qingyuan and Song, Rui and Li, Jiaojiao and Cheng, Kerui and Ferstl, David and Hu, Yinlin}, title = {SCFlow2: Plug-and-Play Object Pose Refiner with Shape-Constraint Scene Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22045-22054} }
FlipSketch: Flipping Static Drawings to Text-Guided Sketch Animations: Hmrishav Bandyopadhyay,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bandyopadhyay_2025_CVPR, author = {Bandyopadhyay, Hmrishav and Song, Yi-Zhe}, title = {FlipSketch: Flipping Static Drawings to Text-Guided Sketch Animations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28394-28404} }
SketchAgent: Language-Driven Sequential Sketch Generation: Yael Vinker,

Tamar Rott Shaham,

Kristine Zheng,

Alex Zhao,

Judith E Fan,

Antonio Torralba; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vinker_2025_CVPR, author = {Vinker, Yael and Shaham, Tamar Rott and Zheng, Kristine and Zhao, Alex and E Fan, Judith and Torralba, Antonio}, title = {SketchAgent: Language-Driven Sequential Sketch Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23355-23368} }
DRAWER: Digital Reconstruction and Articulation With Environment Realism: Hongchi Xia,

Entong Su,

Marius Memmel,

Arhan Jain,

Raymond Yu,

Numfor Mbiziwo-Tiapo,

Ali Farhadi,

Abhishek Gupta,

Shenlong Wang,

Wei-Chiu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Hongchi and Su, Entong and Memmel, Marius and Jain, Arhan and Yu, Raymond and Mbiziwo-Tiapo, Numfor and Farhadi, Ali and Gupta, Abhishek and Wang, Shenlong and Ma, Wei-Chiu}, title = {DRAWER: Digital Reconstruction and Articulation With Environment Realism}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21771-21782} }
GoLF-NRT: Integrating Global Context and Local Geometry for Few-Shot View Synthesis: You Wang,

Li Fang,

Hao Zhu,

Fei Hu,

Long Ye,

Zhan Ma; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, You and Fang, Li and Zhu, Hao and Hu, Fei and Ye, Long and Ma, Zhan}, title = {GoLF-NRT: Integrating Global Context and Local Geometry for Few-Shot View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21349-21359} }
Deep Change Monitoring: A Hyperbolic Representative Learning Framework and a Dataset for Long-term Fine-grained Tree Change Detection: Yante Li,

Hanwen Qi,

Haoyu Chen,

Xinlian Liang,

Guoying Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yante and Qi, Hanwen and Chen, Haoyu and Liang, Xinlian and Zhao, Guoying}, title = {Deep Change Monitoring: A Hyperbolic Representative Learning Framework and a Dataset for Long-term Fine-grained Tree Change Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27346-27356} }
ITA-MDT: Image-Timestep-Adaptive Masked Diffusion Transformer Framework for Image-Based Virtual Try-On: Ji Woo Hong,

Tri Ton,

Trung X. Pham,

Gwanhyeong Koo,

Sunjae Yoon,

Chang D. Yoo; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2025_CVPR, author = {Hong, Ji Woo and Ton, Tri and Pham, Trung X. and Koo, Gwanhyeong and Yoon, Sunjae and Yoo, Chang D.}, title = {ITA-MDT: Image-Timestep-Adaptive Masked Diffusion Transformer Framework for Image-Based Virtual Try-On}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28284-28294} }
MultiVENT 2.0: A Massive Multilingual Benchmark for Event-Centric Video Retrieval: Reno Kriz,

Kate Sanders,

David Etter,

Kenton Murray,

Cameron Carpenter,

Hannah Recknor,

Jimena Guallar-Blasco,

Alexander Martin,

Eugene Yang,

Benjamin Van Durme; [pdf] [supp]
[bibtex]
@InProceedings{Kriz_2025_CVPR, author = {Kriz, Reno and Sanders, Kate and Etter, David and Murray, Kenton and Carpenter, Cameron and Recknor, Hannah and Guallar-Blasco, Jimena and Martin, Alexander and Yang, Eugene and Van Durme, Benjamin}, title = {MultiVENT 2.0: A Massive Multilingual Benchmark for Event-Centric Video Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24149-24158} }
VolFormer: Explore More Comprehensive Cube Interaction for Hyperspectral Image Restoration and Beyond: Dabing Yu,

Zheng Gao; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Dabing and Gao, Zheng}, title = {VolFormer: Explore More Comprehensive Cube Interaction for Hyperspectral Image Restoration and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28091-28101} }
BizGen: Advancing Article-level Visual Text Rendering for Infographics Generation: Yuyang Peng,

Shishi Xiao,

Keming Wu,

Qisheng Liao,

Bohan Chen,

Kevin Lin,

Danqing Huang,

Ji Li,

Yuhui Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Yuyang and Xiao, Shishi and Wu, Keming and Liao, Qisheng and Chen, Bohan and Lin, Kevin and Huang, Danqing and Li, Ji and Yuan, Yuhui}, title = {BizGen: Advancing Article-level Visual Text Rendering for Infographics Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23615-23624} }
SmartCLIP: Modular Vision-language Alignment with Identification Guarantees: Shaoan Xie,

Lingjing Lingjing,

Yujia Zheng,

Yu Yao,

Zeyu Tang,

Eric P. Xing,

Guangyi Chen,

Kun Zhang; [pdf]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Shaoan and Lingjing, Lingjing and Zheng, Yujia and Yao, Yu and Tang, Zeyu and Xing, Eric P. and Chen, Guangyi and Zhang, Kun}, title = {SmartCLIP: Modular Vision-language Alignment with Identification Guarantees}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29780-29790} }
Q-DiT: Accurate Post-Training Quantization for Diffusion Transformers: Lei Chen,

Yuan Meng,

Chen Tang,

Xinzhu Ma,

Jingyan Jiang,

Xin Wang,

Zhi Wang,

Wenwu Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Lei and Meng, Yuan and Tang, Chen and Ma, Xinzhu and Jiang, Jingyan and Wang, Xin and Wang, Zhi and Zhu, Wenwu}, title = {Q-DiT: Accurate Post-Training Quantization for Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28306-28315} }
RoboGround: Robotic Manipulation with Grounded Vision-Language Priors: Haifeng Huang,

Xinyi Chen,

Yilun Chen,

Hao Li,

Xiaoshen Han,

Zehan Wang,

Tai Wang,

Jiangmiao Pang,

Zhou Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Haifeng and Chen, Xinyi and Chen, Yilun and Li, Hao and Han, Xiaoshen and Wang, Zehan and Wang, Tai and Pang, Jiangmiao and Zhao, Zhou}, title = {RoboGround: Robotic Manipulation with Grounded Vision-Language Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22540-22550} }
Improving Transferable Targeted Attacks with Feature Tuning Mixup: Kaisheng Liang,

Xuelong Dai,

Yanjie Li,

Dong Wang,

Bin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Kaisheng and Dai, Xuelong and Li, Yanjie and Wang, Dong and Xiao, Bin}, title = {Improving Transferable Targeted Attacks with Feature Tuning Mixup}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25802-25811} }
DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation: Tianyi Yan,

Dongming Wu,

Wencheng Han,

Junpeng Jiang,

Xia Zhou,

Kun Zhan,

Cheng-zhong Xu,

Jianbing Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Tianyi and Wu, Dongming and Han, Wencheng and Jiang, Junpeng and Zhou, Xia and Zhan, Kun and Xu, Cheng-zhong and Shen, Jianbing}, title = {DrivingSphere: Building a High-fidelity 4D World for Closed-loop Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27531-27541} }
HuPerFlow: A Comprehensive Benchmark for Human vs. Machine Motion Estimation Comparison: Yung-Hao Yang,

Zitang Sun,

Taiki Fukiage,

Shin'ya Nishida; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yung-Hao and Sun, Zitang and Fukiage, Taiki and Nishida, Shin'ya}, title = {HuPerFlow: A Comprehensive Benchmark for Human vs. Machine Motion Estimation Comparison}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22799-22808} }
MetaWriter: Personalized Handwritten Text Recognition Using Meta-Learned Prompt Tuning: Wenhao Gu,

Li Gu,

Chingyee Yee Suen,

Yang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Wenhao and Gu, Li and Suen, Chingyee Yee and Wang, Yang}, title = {MetaWriter: Personalized Handwritten Text Recognition Using Meta-Learned Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23494-23504} }
Subnet-Aware Dynamic Supernet Training for Neural Architecture Search: Jeimin Jeon,

Youngmin Oh,

Junghyup Lee,

Donghyeon Baek,

Dohyung Kim,

Chanho Eom,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2025_CVPR, author = {Jeon, Jeimin and Oh, Youngmin and Lee, Junghyup and Baek, Donghyeon and Kim, Dohyung and Eom, Chanho and Ham, Bumsub}, title = {Subnet-Aware Dynamic Supernet Training for Neural Architecture Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30137-30146} }
EchoWorld: Learning Motion-Aware World Models for Echocardiography Probe Guidance: Yang Yue,

Yulin Wang,

Haojun Jiang,

Pan Liu,

Shiji Song,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2025_CVPR, author = {Yue, Yang and Wang, Yulin and Jiang, Haojun and Liu, Pan and Song, Shiji and Huang, Gao}, title = {EchoWorld: Learning Motion-Aware World Models for Echocardiography Probe Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25993-26003} }
Controllable Human Image Generation with Personalized Multi-Garments: Yisol Choi,

Sangkyung Kwak,

Sihyun Yu,

Hyungwon Choi,

Jinwoo Shin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2025_CVPR, author = {Choi, Yisol and Kwak, Sangkyung and Yu, Sihyun and Choi, Hyungwon and Shin, Jinwoo}, title = {Controllable Human Image Generation with Personalized Multi-Garments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28736-28747} }
UHD-processer: Unified UHD Image Restoration with Progressive Frequency Learning and Degradation-aware Prompts: Yidi Liu,

Dong Li,

Xueyang Fu,

Xin Lu,

Jie Huang,

Zheng-Jun Zha; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yidi and Li, Dong and Fu, Xueyang and Lu, Xin and Huang, Jie and Zha, Zheng-Jun}, title = {UHD-processer: Unified UHD Image Restoration with Progressive Frequency Learning and Degradation-aware Prompts}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23121-23130} }
GBC-Splat: Generalizable Gaussian-Based Clothed Human Digitalization under Sparse RGB Cameras: Hanzhang Tu,

Zhanfeng Liao,

Boyao Zhou,

Shunyuan Zheng,

Xilong Zhou,

Liuxin Zhang,

QianYing Wang,

Yebin Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Hanzhang and Liao, Zhanfeng and Zhou, Boyao and Zheng, Shunyuan and Zhou, Xilong and Zhang, Liuxin and Wang, QianYing and Liu, Yebin}, title = {GBC-Splat: Generalizable Gaussian-Based Clothed Human Digitalization under Sparse RGB Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26377-26387} }
AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers: Sherwin Bahmani,

Ivan Skorokhodov,

Guocheng Qian,

Aliaksandr Siarohin,

Willi Menapace,

Andrea Tagliasacchi,

David B. Lindell,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bahmani_2025_CVPR, author = {Bahmani, Sherwin and Skorokhodov, Ivan and Qian, Guocheng and Siarohin, Aliaksandr and Menapace, Willi and Tagliasacchi, Andrea and Lindell, David B. and Tulyakov, Sergey}, title = {AC3D: Analyzing and Improving 3D Camera Control in Video Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22875-22889} }
A Unified Model for Compressed Sensing MRI Across Undersampling Patterns: Armeet Singh Jatyani,

Jiayun Wang,

Aditi Chandrashekar,

Zihui Wu,

Miguel Liu-Schiaffini,

Bahareh Tolooshams,

Anima Anandkumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jatyani_2025_CVPR, author = {Jatyani, Armeet Singh and Wang, Jiayun and Chandrashekar, Aditi and Wu, Zihui and Liu-Schiaffini, Miguel and Tolooshams, Bahareh and Anandkumar, Anima}, title = {A Unified Model for Compressed Sensing MRI Across Undersampling Patterns}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26004-26013} }
TSD-SR: One-Step Diffusion with Target Score Distillation for Real-World Image Super-Resolution: Linwei Dong,

Qingnan Fan,

Yihong Guo,

Zhonghao Wang,

Qi Zhang,

Jinwei Chen,

Yawei Luo,

Changqing Zou; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Linwei and Fan, Qingnan and Guo, Yihong and Wang, Zhonghao and Zhang, Qi and Chen, Jinwei and Luo, Yawei and Zou, Changqing}, title = {TSD-SR: One-Step Diffusion with Target Score Distillation for Real-World Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23174-23184} }
Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass: Jianing Yang,

Alexander Sax,

Kevin J. Liang,

Mikael Henaff,

Hao Tang,

Ang Cao,

Joyce Chai,

Franziska Meier,

Matt Feiszli; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jianing and Sax, Alexander and Liang, Kevin J. and Henaff, Mikael and Tang, Hao and Cao, Ang and Chai, Joyce and Meier, Franziska and Feiszli, Matt}, title = {Fast3R: Towards 3D Reconstruction of 1000+ Images in One Forward Pass}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21924-21935} }
StyleStudio: Text-Driven Style Transfer with Selective Control of Style Elements: Mingkun Lei,

Xue Song,

Beier Zhu,

Hao Wang,

Chi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2025_CVPR, author = {Lei, Mingkun and Song, Xue and Zhu, Beier and Wang, Hao and Zhang, Chi}, title = {StyleStudio: Text-Driven Style Transfer with Selective Control of Style Elements}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23443-23452} }
CTRL-O: Language-Controllable Object-Centric Visual Representation Learning: Aniket Didolkar,

Andrii Zadaianchuk,

Rabiul Awal,

Maximilian Seitzer,

Efstratios Gavves,

Aishwarya Agrawal; [pdf] [supp]
[bibtex]
@InProceedings{Didolkar_2025_CVPR, author = {Didolkar, Aniket and Zadaianchuk, Andrii and Awal, Rabiul and Seitzer, Maximilian and Gavves, Efstratios and Agrawal, Aishwarya}, title = {CTRL-O: Language-Controllable Object-Centric Visual Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29523-29533} }
Text Augmented Correlation Transformer For Few-shot Classification & Segmentation: Srinivasa Rao Nandam,

Sara Atito,

Zhenhua Feng,

Josef Kittler,

Muhammad Awais; [pdf] [supp]
[bibtex]
@InProceedings{Nandam_2025_CVPR, author = {Nandam, Srinivasa Rao and Atito, Sara and Feng, Zhenhua and Kittler, Josef and Awais, Muhammad}, title = {Text Augmented Correlation Transformer For Few-shot Classification \& Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25357-25366} }
Unified Dense Prediction of Video Diffusion: Lehan Yang,

Lu Qi,

Xiangtai Li,

Sheng Li,

Varun Jampani,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Lehan and Qi, Lu and Li, Xiangtai and Li, Sheng and Jampani, Varun and Yang, Ming-Hsuan}, title = {Unified Dense Prediction of Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28963-28973} }
Towards Million-Scale Adversarial Robustness Evaluation With Stronger Individual Attacks: Yong Xie,

Weijie Zheng,

Hanxun Huang,

Guangnan Ye,

Xingjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yong and Zheng, Weijie and Huang, Hanxun and Ye, Guangnan and Ma, Xingjun}, title = {Towards Million-Scale Adversarial Robustness Evaluation With Stronger Individual Attacks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30702-30711} }
Temporal Action Detection Model Compression by Progressive Block Drop: Xiaoyong Chen,

Yong Guo,

Jiaming Liang,

Sitong Zhuang,

Runhao Zeng,

Xiping Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xiaoyong and Guo, Yong and Liang, Jiaming and Zhuang, Sitong and Zeng, Runhao and Hu, Xiping}, title = {Temporal Action Detection Model Compression by Progressive Block Drop}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29225-29236} }
Pursuing Temporal-Consistent Video Virtual Try-On via Dynamic Pose Interaction: Dong Li,

Wenqi Zhong,

Wei Yu,

Yingwei Pan,

Dingwen Zhang,

Ting Yao,

Junwei Han,

Tao Mei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Dong and Zhong, Wenqi and Yu, Wei and Pan, Yingwei and Zhang, Dingwen and Yao, Ting and Han, Junwei and Mei, Tao}, title = {Pursuing Temporal-Consistent Video Virtual Try-On via Dynamic Pose Interaction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22648-22657} }
DINOv2 Meets Text: A Unified Framework for Image- and Pixel-Level Vision-Language Alignment: Cijo Jose,

Théo Moutakanni,

Dahyun Kang,

Federico Baldassarre,

Timothée Darcet,

Hu Xu,

Daniel Li,

Marc Szafraniec,

Michaël Ramamonjisoa,

Maxime Oquab,

Oriane Siméoni,

Huy V. Vo,

Patrick Labatut,

Piotr Bojanowski; [pdf] [supp]
[bibtex]
@InProceedings{Jose_2025_CVPR, author = {Jose, Cijo and Moutakanni, Th\'eo and Kang, Dahyun and Baldassarre, Federico and Darcet, Timoth\'ee and Xu, Hu and Li, Daniel and Szafraniec, Marc and Ramamonjisoa, Micha\"el and Oquab, Maxime and Sim\'eoni, Oriane and Vo, Huy V. and Labatut, Patrick and Bojanowski, Piotr}, title = {DINOv2 Meets Text: A Unified Framework for Image- and Pixel-Level Vision-Language Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24905-24916} }
Learning Affine Correspondences by Integrating Geometric Constraints: Pengju Sun,

Banglei Guan,

Zhenbao Yu,

Yang Shang,

Qifeng Yu,

Daniel Barath; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Pengju and Guan, Banglei and Yu, Zhenbao and Shang, Yang and Yu, Qifeng and Barath, Daniel}, title = {Learning Affine Correspondences by Integrating Geometric Constraints}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27038-27048} }
UCOD-DPL: Unsupervised Camouflaged Object Detection via Dynamic Pseudo-label Learning: Weiqi Yan,

Lvhai Chen,

Huaijia Kou,

Shengchuan Zhang,

Yan Zhang,

Liujuan Cao; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Weiqi and Chen, Lvhai and Kou, Huaijia and Zhang, Shengchuan and Zhang, Yan and Cao, Liujuan}, title = {UCOD-DPL: Unsupervised Camouflaged Object Detection via Dynamic Pseudo-label Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30365-30375} }
Geometry in Style: 3D Stylization via Surface Normal Deformation: Nam Anh Dinh,

Itai Lang,

Hyunwoo Kim,

Oded Stein,

Rana Hanocka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dinh_2025_CVPR, author = {Dinh, Nam Anh and Lang, Itai and Kim, Hyunwoo and Stein, Oded and Hanocka, Rana}, title = {Geometry in Style: 3D Stylization via Surface Normal Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28456-28467} }
PVC: Progressive Visual Token Compression for Unified Image and Video Processing in Large Vision-Language Models: Chenyu Yang,

Xuan Dong,

Xizhou Zhu,

Weijie Su,

Jiahao Wang,

Hao Tian,

Zhe Chen,

Wenhai Wang,

Lewei Lu,

Jifeng Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Chenyu and Dong, Xuan and Zhu, Xizhou and Su, Weijie and Wang, Jiahao and Tian, Hao and Chen, Zhe and Wang, Wenhai and Lu, Lewei and Dai, Jifeng}, title = {PVC: Progressive Visual Token Compression for Unified Image and Video Processing in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24939-24949} }
Multiple Object Tracking as ID Prediction: Ruopeng Gao,

Ji Qi,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Ruopeng and Qi, Ji and Wang, Limin}, title = {Multiple Object Tracking as ID Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27883-27893} }
PIDLoc: Cross-View Pose Optimization Network Inspired by PID Controllers: Wooju Lee,

Juhye Park,

Dasol Hong,

Changki Sung,

Youngwoo Seo,

DongWan Kang,

Hyun Myung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Wooju and Park, Juhye and Hong, Dasol and Sung, Changki and Seo, Youngwoo and Kang, DongWan and Myung, Hyun}, title = {PIDLoc: Cross-View Pose Optimization Network Inspired by PID Controllers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21981-21990} }
DreamOmni: Unified Image Generation and Editing: Bin Xia,

Yuechen Zhang,

Jingyao Li,

Chengyao Wang,

Yitong Wang,

Xinglong Wu,

Bei Yu,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2025_CVPR, author = {Xia, Bin and Zhang, Yuechen and Li, Jingyao and Wang, Chengyao and Wang, Yitong and Wu, Xinglong and Yu, Bei and Jia, Jiaya}, title = {DreamOmni: Unified Image Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28533-28543} }
Hash3D: Training-free Acceleration for 3D Generation: Xingyi Yang,

Songhua Liu,

Xinchao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Xingyi and Liu, Songhua and Wang, Xinchao}, title = {Hash3D: Training-free Acceleration for 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21481-21491} }
Learning Hazing to Dehazing: Towards Realistic Haze Generation for Real-World Image Dehazing: Ruiyi Wang,

Yushuo Zheng,

Zicheng Zhang,

Chunyi Li,

Shuaicheng Liu,

Guangtao Zhai,

Xiaohong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ruiyi and Zheng, Yushuo and Zhang, Zicheng and Li, Chunyi and Liu, Shuaicheng and Zhai, Guangtao and Liu, Xiaohong}, title = {Learning Hazing to Dehazing: Towards Realistic Haze Generation for Real-World Image Dehazing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23091-23100} }
RUBIK: A Structured Benchmark for Image Matching across Geometric Challenges: Thibaut Loiseau,

Guillaume Bourmaud; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Loiseau_2025_CVPR, author = {Loiseau, Thibaut and Bourmaud, Guillaume}, title = {RUBIK: A Structured Benchmark for Image Matching across Geometric Challenges}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27070-27080} }
Fast and Accurate Gigapixel Pathological Image Classification with Hierarchical Distillation Multi-Instance Learning: Jiuyang Dong,

Junjun Jiang,

Kui Jiang,

Jiahan Li,

Yongbing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Jiuyang and Jiang, Junjun and Jiang, Kui and Li, Jiahan and Zhang, Yongbing}, title = {Fast and Accurate Gigapixel Pathological Image Classification with Hierarchical Distillation Multi-Instance Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30818-30828} }
IncEventGS: Pose-Free Gaussian Splatting from a Single Event Camera: Jian Huang,

Chengrui Dong,

Xuanhua Chen,

Peidong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Jian and Dong, Chengrui and Chen, Xuanhua and Liu, Peidong}, title = {IncEventGS: Pose-Free Gaussian Splatting from a Single Event Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26933-26942} }
OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection: Max Gutbrod,

David Rauber,

Danilo Weber Nunes,

Christoph Palm; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gutbrod_2025_CVPR, author = {Gutbrod, Max and Rauber, David and Nunes, Danilo Weber and Palm, Christoph}, title = {OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25874-25886} }
FactCheXcker: Mitigating Measurement Hallucinations in Chest X-ray Report Generation Models: Alice Heiman,

Xiaoman Zhang,

Emma Chen,

Sung Eun Kim,

Pranav Rajpurkar; [pdf] [supp]
[bibtex]
@InProceedings{Heiman_2025_CVPR, author = {Heiman, Alice and Zhang, Xiaoman and Chen, Emma and Kim, Sung Eun and Rajpurkar, Pranav}, title = {FactCheXcker: Mitigating Measurement Hallucinations in Chest X-ray Report Generation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30787-30796} }
When the Future Becomes the Past: Taming Temporal Correspondence for Self-supervised Video Representation Learning: Yang Liu,

Qianqian Xu,

Peisong Wen,

Siran Dai,

Qingming Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Yang and Xu, Qianqian and Wen, Peisong and Dai, Siran and Huang, Qingming}, title = {When the Future Becomes the Past: Taming Temporal Correspondence for Self-supervised Video Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24033-24044} }
UniPose: A Unified Multimodal Framework for Human Pose Comprehension, Generation and Editing: Yiheng Li,

Ruibing Hou,

Hong Chang,

Shiguang Shan,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yiheng and Hou, Ruibing and Chang, Hong and Shan, Shiguang and Chen, Xilin}, title = {UniPose: A Unified Multimodal Framework for Human Pose Comprehension, Generation and Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27805-27815} }
POMP: Physics-consistent Motion Generative Model through Phase Manifolds: Bin Ji,

Ye Pan,

Zhimeng Liu,

Shuai Tan,

Xiaogang Jin,

Xiaokang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Ji_2025_CVPR, author = {Ji, Bin and Pan, Ye and Liu, Zhimeng and Tan, Shuai and Jin, Xiaogang and Yang, Xiaokang}, title = {POMP: Physics-consistent Motion Generative Model through Phase Manifolds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22690-22701} }
Reasoning to Attend: Try to Understand How <SEG> Token Works: Rui Qian,

Xin Yin,

Dejing Dou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Rui and Yin, Xin and Dou, Dejing}, title = {Reasoning to Attend: Try to Understand How \ensuremath{<}SEG\ensuremath{>} Token Works}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24722-24731} }
ReSpec: Relevance and Specificity Grounded Online Filtering for Learning on Video-Text Data Streams: Chris Dongjoo Kim,

Jihwan Moon,

Sangwoo Moon,

Heeseung Yun,

Sihaeng Lee,

Aniruddha Kembhavi,

Soonyoung Lee,

Gunhee Kim,

Sangho Lee,

Christopher Clark; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Chris Dongjoo and Moon, Jihwan and Moon, Sangwoo and Yun, Heeseung and Lee, Sihaeng and Kembhavi, Aniruddha and Lee, Soonyoung and Kim, Gunhee and Lee, Sangho and Clark, Christopher}, title = {ReSpec: Relevance and Specificity Grounded Online Filtering for Learning on Video-Text Data Streams}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29040-29049} }
DynRefer: Delving into Region-level Multimodal Tasks via Dynamic Resolution: Yuzhong Zhao,

Feng Liu,

Yue Liu,

Mingxiang Liao,

Chen Gong,

Qixiang Ye,

Fang Wan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yuzhong and Liu, Feng and Liu, Yue and Liao, Mingxiang and Gong, Chen and Ye, Qixiang and Wan, Fang}, title = {DynRefer: Delving into Region-level Multimodal Tasks via Dynamic Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24742-24752} }
Playing the Fool: Jailbreaking LLMs and Multimodal LLMs with Out-of-Distribution Strategy: Joonhyun Jeong,

Seyun Bae,

Yeonsung Jung,

Jaeryong Hwang,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Joonhyun and Bae, Seyun and Jung, Yeonsung and Hwang, Jaeryong and Yang, Eunho}, title = {Playing the Fool: Jailbreaking LLMs and Multimodal LLMs with Out-of-Distribution Strategy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29937-29946} }
VideoWorld: Exploring Knowledge Learning from Unlabeled Videos: Zhongwei Ren,

Yunchao Wei,

Xun Guo,

Yao Zhao,

Bingyi Kang,

Jiashi Feng,

Xiaojie Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Zhongwei and Wei, Yunchao and Guo, Xun and Zhao, Yao and Kang, Bingyi and Feng, Jiashi and Jin, Xiaojie}, title = {VideoWorld: Exploring Knowledge Learning from Unlabeled Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29029-29039} }
3D-SLNR: A Super Lightweight Neural Representation for Large-scale 3D Mapping: Chenhui Shi,

Fulin Tang,

Ning An,

Yihong Wu; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Chenhui and Tang, Fulin and An, Ning and Wu, Yihong}, title = {3D-SLNR: A Super Lightweight Neural Representation for Large-scale 3D Mapping}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27233-27242} }
STINR: Deciphering Spatial Transcriptomics via Implicit Neural Representation: Yisi Luo,

Xile Zhao,

Kai Ye,

Deyu Meng; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Yisi and Zhao, Xile and Ye, Kai and Meng, Deyu}, title = {STINR: Deciphering Spatial Transcriptomics via Implicit Neural Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25930-25939} }
RADIOv2.5: Improved Baselines for Agglomerative Vision Foundation Models: Greg Heinrich,

Mike Ranzinger,

Hongxu Yin,

Yao Lu,

Jan Kautz,

Andrew Tao,

Bryan Catanzaro,

Pavlo Molchanov; [pdf] [supp]
[bibtex]
@InProceedings{Heinrich_2025_CVPR, author = {Heinrich, Greg and Ranzinger, Mike and Yin, Hongxu and Lu, Yao and Kautz, Jan and Tao, Andrew and Catanzaro, Bryan and Molchanov, Pavlo}, title = {RADIOv2.5: Improved Baselines for Agglomerative Vision Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22487-22497} }
Unsupervised Discovery of Facial Landmarks and Head Pose: Satyajit Tourani,

Siddharth Tourani,

Arif Mahmood,

Muhammad Haris Khan; [pdf] [supp]
[bibtex]
@InProceedings{Tourani_2025_CVPR, author = {Tourani, Satyajit and Tourani, Siddharth and Mahmood, Arif and Khan, Muhammad Haris}, title = {Unsupervised Discovery of Facial Landmarks and Head Pose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21192-21202} }
Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning: Sherry X. Chen,

Misha Sra,

Pradeep Sen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Sherry X. and Sra, Misha and Sen, Pradeep}, title = {Instruct-CLIP: Improving Instruction-Guided Image Editing with Automated Data Refinement Using Contrastive Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28513-28522} }
Stabilizing and Accelerating Autofocus with Expert Trajectory Regularized Deep Reinforcement Learning: Shouhang Zhu,

Chenglin Li,

Yuankun Jiang,

Li Wei,

Nuowen Kan,

Ziyang Zheng,

Wenrui Dai,

Junni Zou,

Hongkai Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Shouhang and Li, Chenglin and Jiang, Yuankun and Wei, Li and Kan, Nuowen and Zheng, Ziyang and Dai, Wenrui and Zou, Junni and Xiong, Hongkai}, title = {Stabilizing and Accelerating Autofocus with Expert Trajectory Regularized Deep Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26440-26450} }
Repurposing Stable Diffusion Attention for Training-Free Unsupervised Interactive Segmentation: Markus Karmann,

Onay Urfalioglu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karmann_2025_CVPR, author = {Karmann, Markus and Urfalioglu, Onay}, title = {Repurposing Stable Diffusion Attention for Training-Free Unsupervised Interactive Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24518-24528} }
GO-N3RDet: Geometry Optimized NeRF-enhanced 3D Object Detector: Zechuan Li,

Hongshan Yu,

Yihao Ding,

Jinhao Qiao,

Basim Azam,

Naveed Akhtar; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zechuan and Yu, Hongshan and Ding, Yihao and Qiao, Jinhao and Azam, Basim and Akhtar, Naveed}, title = {GO-N3RDet: Geometry Optimized NeRF-enhanced 3D Object Detector}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27211-27221} }
DPSeg: Dual-Prompt Cost Volume Learning for Open-Vocabulary Semantic Segmentation: Ziyu Zhao,

Xiaoguang Li,

Lingjia Shi,

Nasrin Imanpour,

Song Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Ziyu and Li, Xiaoguang and Shi, Lingjia and Imanpour, Nasrin and Wang, Song}, title = {DPSeg: Dual-Prompt Cost Volume Learning for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25346-25356} }
Simulator HC: Regression-based Online Simulation of Starting Problem-Solution Pairs for Homotopy Continuation in Geometric Vision: Xinyue Zhang,

Zijia Dai,

Wanting Xu,

Laurent Kneip; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xinyue and Dai, Zijia and Xu, Wanting and Kneip, Laurent}, title = {Simulator HC: Regression-based Online Simulation of Starting Problem-Solution Pairs for Homotopy Continuation in Geometric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27103-27112} }
Dynamic Integration of Task-Specific Adapters for Class Incremental Learning: Jiashuo Li,

Shaokun Wang,

Bo Qian,

Yuhang He,

Xing Wei,

Qiang Wang,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiashuo and Wang, Shaokun and Qian, Bo and He, Yuhang and Wei, Xing and Wang, Qiang and Gong, Yihong}, title = {Dynamic Integration of Task-Specific Adapters for Class Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30545-30555} }
EgoPressure: A Dataset for Hand Pressure and Pose Estimation in Egocentric Vision: Yiming Zhao,

Taein Kwon,

Paul Streli,

Marc Pollefeys,

Christian Holz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yiming and Kwon, Taein and Streli, Paul and Pollefeys, Marc and Holz, Christian}, title = {EgoPressure: A Dataset for Hand Pressure and Pose Estimation in Egocentric Vision}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27727-27738} }
DiverseFlow: Sample-Efficient Diverse Mode Coverage in Flows: Mashrur M. Morshed,

Vishnu Boddeti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morshed_2025_CVPR, author = {Morshed, Mashrur M. and Boddeti, Vishnu}, title = {DiverseFlow: Sample-Efficient Diverse Mode Coverage in Flows}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23303-23312} }
MaskGWM: A Generalizable Driving World Model with Video Mask Reconstruction: Jingcheng Ni,

Yuxin Guo,

Yichen Liu,

Rui Chen,

Lewei Lu,

Zehuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Jingcheng and Guo, Yuxin and Liu, Yichen and Chen, Rui and Lu, Lewei and Wu, Zehuan}, title = {MaskGWM: A Generalizable Driving World Model with Video Mask Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22381-22391} }
3D-MVP: 3D Multiview Pretraining for Manipulation: Shengyi Qian,

Kaichun Mo,

Valts Blukis,

David F. Fouhey,

Dieter Fox,

Ankit Goyal; [pdf]
[bibtex]
@InProceedings{Qian_2025_CVPR, author = {Qian, Shengyi and Mo, Kaichun and Blukis, Valts and Fouhey, David F. and Fox, Dieter and Goyal, Ankit}, title = {3D-MVP: 3D Multiview Pretraining for Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22530-22539} }
Enhanced OoD Detection through Cross-Modal Alignment of Multi-Modal Representations: Jeonghyeon Kim,

Sangheum Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Jeonghyeon and Hwang, Sangheum}, title = {Enhanced OoD Detection through Cross-Modal Alignment of Multi-Modal Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29979-29988} }
Mimir: Improving Video Diffusion Models for Precise Text Understanding: Shuai Tan,

Biao Gong,

Yutong Feng,

Kecheng Zheng,

Dandan Zheng,

Shuwei Shi,

Yujun Shen,

Jingdong Chen,

Ming Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Shuai and Gong, Biao and Feng, Yutong and Zheng, Kecheng and Zheng, Dandan and Shi, Shuwei and Shen, Yujun and Chen, Jingdong and Yang, Ming}, title = {Mimir: Improving Video Diffusion Models for Precise Text Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23978-23988} }
UCM-VeID V2: A Richer Dataset and A Pre-training Method for UAV Cross-Modality Vehicle Re-Identification: Xingyue Liu,

Jiahao Qi,

Chen Chen,

KangCheng Bin,

Ping Zhong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xingyue and Qi, Jiahao and Chen, Chen and Bin, KangCheng and Zhong, Ping}, title = {UCM-VeID V2: A Richer Dataset and A Pre-training Method for UAV Cross-Modality Vehicle Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22286-22295} }
GeoAvatar: Geometrically-Consistent Multi-Person Avatar Reconstruction from Sparse Multi-View Videos: Soohyun Lee,

Seoyeon Kim,

HeeKyung Lee,

Won-Sik Jeong,

Joo Ho Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Soohyun and Kim, Seoyeon and Lee, HeeKyung and Jeong, Won-Sik and Lee, Joo Ho}, title = {GeoAvatar: Geometrically-Consistent Multi-Person Avatar Reconstruction from Sparse Multi-View Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21138-21147} }
DiET-GS: Diffusion Prior and Event Stream-Assisted Motion Deblurring 3D Gaussian Splatting: Seungjun Lee,

Gim Hee Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Seungjun and Lee, Gim Hee}, title = {DiET-GS: Diffusion Prior and Event Stream-Assisted Motion Deblurring 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21739-21749} }
Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives: Alex Hanson,

Allen Tu,

Geng Lin,

Vasu Singla,

Matthias Zwicker,

Tom Goldstein; [pdf] [supp]
[bibtex]
@InProceedings{Hanson_2025_CVPR, author = {Hanson, Alex and Tu, Allen and Lin, Geng and Singla, Vasu and Zwicker, Matthias and Goldstein, Tom}, title = {Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21537-21546} }
Omnia de EgoTempo: Benchmarking Temporal Understanding of Multi-Modal LLMs in Egocentric Videos: Chiara Plizzari,

Alessio Tonioni,

Yongqin Xian,

Achin Kulshrestha,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Plizzari_2025_CVPR, author = {Plizzari, Chiara and Tonioni, Alessio and Xian, Yongqin and Kulshrestha, Achin and Tombari, Federico}, title = {Omnia de EgoTempo: Benchmarking Temporal Understanding of Multi-Modal LLMs in Egocentric Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24129-24138} }
ODHSR: Online Dense 3D Reconstruction of Humans and Scenes from Monocular Videos: Zetong Zhang,

Manuel Kaufmann,

Lixin Xue,

Jie Song,

Martin R. Oswald; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zetong and Kaufmann, Manuel and Xue, Lixin and Song, Jie and Oswald, Martin R.}, title = {ODHSR: Online Dense 3D Reconstruction of Humans and Scenes from Monocular Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21824-21835} }
SpiritSight Agent: Advanced GUI Agent with One Look: Zhiyuan Huang,

Ziming Cheng,

Junting Pan,

Zhaohui Hou,

Mingjie Zhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhiyuan and Cheng, Ziming and Pan, Junting and Hou, Zhaohui and Zhan, Mingjie}, title = {SpiritSight Agent: Advanced GUI Agent with One Look}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29490-29500} }
Zero-Shot Monocular Scene Flow Estimation in the Wild: Yiqing Liang,

Abhishek Badki,

Hang Su,

James Tompkin,

Orazio Gallo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yiqing and Badki, Abhishek and Su, Hang and Tompkin, James and Gallo, Orazio}, title = {Zero-Shot Monocular Scene Flow Estimation in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21031-21044} }
MG-MotionLLM: A Unified Framework for Motion Comprehension and Generation across Multiple Granularities: Bizhu Wu,

Jinheng Xie,

Keming Shen,

Zhe Kong,

Jianfeng Ren,

Ruibin Bai,

Rong Qu,

Linlin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Bizhu and Xie, Jinheng and Shen, Keming and Kong, Zhe and Ren, Jianfeng and Bai, Ruibin and Qu, Rong and Shen, Linlin}, title = {MG-MotionLLM: A Unified Framework for Motion Comprehension and Generation across Multiple Granularities}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27849-27858} }
Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation: Yuheng Feng,

Changsong Wen,

Zelin Peng,

Li jiaye,

Siyu Zhu; [pdf]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Yuheng and Wen, Changsong and Peng, Zelin and jiaye, Li and Zhu, Siyu}, title = {Retaining Knowledge and Enhancing Long-Text Representations in CLIP through Dual-Teacher Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24895-24904} }
MMRL: Multi-Modal Representation Learning for Vision-Language Models: Yuncheng Guo,

Xiaodong Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yuncheng and Gu, Xiaodong}, title = {MMRL: Multi-Modal Representation Learning for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25015-25025} }
Anchor-Aware Similarity Cohesion in Target Frames Enables Predicting Temporal Moment Boundaries in 2D: Jiawei Tan,

Hongxing Wang,

Junwu Weng,

Jiaxin Li,

Zhilong Ou,

Kang Dang; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Jiawei and Wang, Hongxing and Weng, Junwu and Li, Jiaxin and Ou, Zhilong and Dang, Kang}, title = {Anchor-Aware Similarity Cohesion in Target Frames Enables Predicting Temporal Moment Boundaries in 2D}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24180-24189} }
Breaking the Low-Rank Dilemma of Linear Attention: Qihang Fan,

Huaibo Huang,

Ran He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Qihang and Huang, Huaibo and He, Ran}, title = {Breaking the Low-Rank Dilemma of Linear Attention}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25271-25280} }
Embracing Collaboration Over Competition: Condensing Multiple Prompts for Visual In-Context Learning: Jinpeng Wang,

Tianci Luo,

Yaohua Zha,

Yan Feng,

Ruisheng Luo,

Bin Chen,

Tao Dai,

Long Chen,

Yaowei Wang,

Shu-Tao Xia; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jinpeng and Luo, Tianci and Zha, Yaohua and Feng, Yan and Luo, Ruisheng and Chen, Bin and Dai, Tao and Chen, Long and Wang, Yaowei and Xia, Shu-Tao}, title = {Embracing Collaboration Over Competition: Condensing Multiple Prompts for Visual In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25156-25165} }
Unity in Diversity: Video Editing via Gradient-Latent Purification: Junyu Gao,

Kunlin Yang,

Xuan Yao,

Yufan Hu; [pdf]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Junyu and Yang, Kunlin and Yao, Xuan and Hu, Yufan}, title = {Unity in Diversity: Video Editing via Gradient-Latent Purification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23401-23411} }
Revealing Key Details to See Differences: A Novel Prototypical Perspective for Skeleton-based Action Recognition: Hongda Liu,

Yunfan Liu,

Min Ren,

Hao Wang,

Yunlong Wang,

Zhenan Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hongda and Liu, Yunfan and Ren, Min and Wang, Hao and Wang, Yunlong and Sun, Zhenan}, title = {Revealing Key Details to See Differences: A Novel Prototypical Perspective for Skeleton-based Action Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29248-29257} }
Finsler Multi-Dimensional Scaling: Manifold Learning for Asymmetric Dimensionality Reduction and Embedding: Thomas Dagès,

Simon Weber,

Ya-Wei Eileen Lin,

Ronen Talmon,

Daniel Cremers,

Michael Lindenbaum,

Alfred M. Bruckstein,

Ron Kimmel; [pdf] [supp]
[bibtex]
@InProceedings{Dages_2025_CVPR, author = {Dag\`es, Thomas and Weber, Simon and Lin, Ya-Wei Eileen and Talmon, Ronen and Cremers, Daniel and Lindenbaum, Michael and Bruckstein, Alfred M. and Kimmel, Ron}, title = {Finsler Multi-Dimensional Scaling: Manifold Learning for Asymmetric Dimensionality Reduction and Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25842-25853} }
VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection: Songhao Han,

Wei Huang,

Hairong Shi,

Le Zhuo,

Xiu Su,

Shifeng Zhang,

Xu Zhou,

Xiaojuan Qi,

Yue Liao,

Si Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Songhao and Huang, Wei and Shi, Hairong and Zhuo, Le and Su, Xiu and Zhang, Shifeng and Zhou, Xu and Qi, Xiaojuan and Liao, Yue and Liu, Si}, title = {VideoEspresso: A Large-Scale Chain-of-Thought Dataset for Fine-Grained Video Reasoning via Core Frame Selection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26181-26191} }
Cross-Modal Distillation for 2D/3D Multi-Object Discovery from 2D Motion: Saad Lahlali,

Sandra Kara,

Hejer Ammar,

Florian Chabot,

Nicolas Granger,

Hervé Le Borgne,

Quoc-Cuong Pham; [pdf] [supp]
[bibtex]
@InProceedings{Lahlali_2025_CVPR, author = {Lahlali, Saad and Kara, Sandra and Ammar, Hejer and Chabot, Florian and Granger, Nicolas and Le Borgne, Herv\'e and Pham, Quoc-Cuong}, title = {Cross-Modal Distillation for 2D/3D Multi-Object Discovery from 2D Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24529-24538} }
Bridge Frame and Event: Common Spatiotemporal Fusion for High-Dynamic Scene Optical Flow: Hanyu Zhou,

Haonan Wang,

Haoyue Liu,

Yuxing Duan,

Yi Chang,

Luxin Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Hanyu and Wang, Haonan and Liu, Haoyue and Duan, Yuxing and Chang, Yi and Yan, Luxin}, title = {Bridge Frame and Event: Common Spatiotemporal Fusion for High-Dynamic Scene Optical Flow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27904-27913} }
Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification: Yanghao Wang,

Long Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yanghao and Chen, Long}, title = {Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25560-25569} }
TSP-Mamba: The Travelling Salesman Problem Meets Mamba for Image Super-resolution and Beyond: Kun Zhou,

Xinyu Lin,

Jiangbo Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Kun and Lin, Xinyu and Lu, Jiangbo}, title = {TSP-Mamba: The Travelling Salesman Problem Meets Mamba for Image Super-resolution and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28134-28143} }
RENO: Real-Time Neural Compression for 3D LiDAR Point Clouds: Kang You,

Tong Chen,

Dandan Ding,

M. Salman Asif,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Kang and Chen, Tong and Ding, Dandan and Asif, M. Salman and Ma, Zhan}, title = {RENO: Real-Time Neural Compression for 3D LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22172-22181} }
FADE: Frequency-Aware Diffusion Model Factorization for Video Editing: Yixuan Zhu,

Haolin Wang,

Shilin Ma,

Wenliang Zhao,

Yansong Tang,

Lei Chen,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yixuan and Wang, Haolin and Ma, Shilin and Zhao, Wenliang and Tang, Yansong and Chen, Lei and Zhou, Jie}, title = {FADE: Frequency-Aware Diffusion Model Factorization for Video Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28426-28435} }
Data Synthesis with Diverse Styles for Face Recognition via 3DMM-Guided Diffusion: Yuxi Mi,

Zhizhou Zhong,

Yuge Huang,

Qiuyang Yuan,

Xuan Zhao,

Jianqing Xu,

Shouhong Ding,

Shaoming Wang,

Rizen Guo,

Shuigeng Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2025_CVPR, author = {Mi, Yuxi and Zhong, Zhizhou and Huang, Yuge and Yuan, Qiuyang and Zhao, Xuan and Xu, Jianqing and Ding, Shouhong and Wang, Shaoming and Guo, Rizen and Zhou, Shuigeng}, title = {Data Synthesis with Diverse Styles for Face Recognition via 3DMM-Guided Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21203-21214} }
Geometric Knowledge-Guided Localized Global Distribution Alignment for Federated Learning: Yanbiao Ma,

Wei Dai,

Wenke Huang,

Jiayi Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Yanbiao and Dai, Wei and Huang, Wenke and Chen, Jiayi}, title = {Geometric Knowledge-Guided Localized Global Distribution Alignment for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20958-20968} }
GlyphMastero: A Glyph Encoder for High-Fidelity Scene Text Editing: Tong Wang,

Ting Liu,

Xiaochao Qu,

Chengjing Wu,

Luoqi Liu,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Tong and Liu, Ting and Qu, Xiaochao and Wu, Chengjing and Liu, Luoqi and Hu, Xiaolin}, title = {GlyphMastero: A Glyph Encoder for High-Fidelity Scene Text Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28523-28532} }
Birth and Death of a Rose: Chen Geng,

Yunzhi Zhang,

Shangzhe Wu,

Jiajun Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Geng_2025_CVPR, author = {Geng, Chen and Zhang, Yunzhi and Wu, Shangzhe and Wu, Jiajun}, title = {Birth and Death of a Rose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26102-26113} }
MetricGrids: Arbitrary Nonlinear Approximation with Elementary Metric Grids based Implicit Neural Representation: Shu Wang,

Yanbo Gao,

Shuai Li,

Chong Lv,

Xun Cai,

Chuankun Li,

Hui Yuan,

Jinglin Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shu and Gao, Yanbo and Li, Shuai and Lv, Chong and Cai, Xun and Li, Chuankun and Yuan, Hui and Zhang, Jinglin}, title = {MetricGrids: Arbitrary Nonlinear Approximation with Elementary Metric Grids based Implicit Neural Representation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21381-21391} }
MovieBench: A Hierarchical Movie Level Dataset for Long Video Generation: Weijia Wu,

Mingyu Liu,

Zeyu Zhu,

Xi Xia,

Haoen Feng,

Wen Wang,

Kevin Qinghong Lin,

Chunhua Shen,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Weijia and Liu, Mingyu and Zhu, Zeyu and Xia, Xi and Feng, Haoen and Wang, Wen and Lin, Kevin Qinghong and Shen, Chunhua and Shou, Mike Zheng}, title = {MovieBench: A Hierarchical Movie Level Dataset for Long Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28984-28994} }
Be More Specific: Evaluating Object-centric Realism in Synthetic Images: Anqi Liang,

Ciprian Corneanu,

Qianli Feng,

Giorgio Giannone,

Aleix Martinez; [pdf] [supp]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Anqi and Corneanu, Ciprian and Feng, Qianli and Giannone, Giorgio and Martinez, Aleix}, title = {Be More Specific: Evaluating Object-centric Realism in Synthetic Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28842-28851} }
Correlative and Discriminative Label Grouping for Multi-Label Visual Prompt Tuning: Lei-Lei Ma,

Shuo Xu,

Ming-Kun Xie,

Lei Wang,

Dengdi Sun,

Haifeng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Lei-Lei and Xu, Shuo and Xie, Ming-Kun and Wang, Lei and Sun, Dengdi and Zhao, Haifeng}, title = {Correlative and Discriminative Label Grouping for Multi-Label Visual Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25434-25443} }
SFDM: Robust Decomposition of Geometry and Reflectance for Realistic Face Rendering from Sparse-view Images: Daisheng Jin,

Jiangbei Hu,

Baixin Xu,

Yuxin Dai,

Chen Qian,

Ying He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Daisheng and Hu, Jiangbei and Xu, Baixin and Dai, Yuxin and Qian, Chen and He, Ying}, title = {SFDM: Robust Decomposition of Geometry and Reflectance for Realistic Face Rendering from Sparse-view Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26409-26419} }
Ouroboros3D: Image-to-3D Generation via 3D-aware Recursive Diffusion: Hao Wen,

Zehuan Huang,

Yaohui Wang,

Xinyuan Chen,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2025_CVPR, author = {Wen, Hao and Huang, Zehuan and Wang, Yaohui and Chen, Xinyuan and Sheng, Lu}, title = {Ouroboros3D: Image-to-3D Generation via 3D-aware Recursive Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21631-21641} }
QMambaBSR: Burst Image Super-Resolution with Query State Space Model: Xin Di,

Long Peng,

Peizhe Xia,

Wenbo Li,

Renjing Pei,

Yang Cao,

Yang Wang,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Di_2025_CVPR, author = {Di, Xin and Peng, Long and Xia, Peizhe and Li, Wenbo and Pei, Renjing and Cao, Yang and Wang, Yang and Zha, Zheng-Jun}, title = {QMambaBSR: Burst Image Super-Resolution with Query State Space Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23080-23090} }
Multi-Group Proportional Representations for Text-to-Image Models: Sangwon Jung,

Alex Oesterling,

Claudio Mayrink Verdun,

Sajani Vithana,

Taesup Moon,

Flavio P. Calmon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Sangwon and Oesterling, Alex and Verdun, Claudio Mayrink and Vithana, Sajani and Moon, Taesup and Calmon, Flavio P.}, title = {Multi-Group Proportional Representations for Text-to-Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23744-23754} }
Towards Generalizable Trajectory Prediction using Dual-Level Representation Learning and Adaptive Prompting: Kaouther Messaoud,

Matthieu Cord,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Messaoud_2025_CVPR, author = {Messaoud, Kaouther and Cord, Matthieu and Alahi, Alexandre}, title = {Towards Generalizable Trajectory Prediction using Dual-Level Representation Learning and Adaptive Prompting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27564-27574} }
CoMatcher: Multi-View Collaborative Feature Matching: Jintao Zhang,

Zimin Xia,

Mingyue Dong,

Shuhan Shen,

Linwei Yue,

Xianwei Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jintao and Xia, Zimin and Dong, Mingyue and Shen, Shuhan and Yue, Linwei and Zheng, Xianwei}, title = {CoMatcher: Multi-View Collaborative Feature Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21970-21980} }
Towards a Universal Synthetic Video Detector: From Face or Background Manipulations to Fully AI-Generated Content: Rohit Kundu,

Hao Xiong,

Vishal Mohanty,

Athula Balachandran,

Amit K. Roy-Chowdhury; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kundu_2025_CVPR, author = {Kundu, Rohit and Xiong, Hao and Mohanty, Vishal and Balachandran, Athula and Roy-Chowdhury, Amit K.}, title = {Towards a Universal Synthetic Video Detector: From Face or Background Manipulations to Fully AI-Generated Content}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28050-28060} }
A Focused Human Body Model for Accurate Anthropometric Measurements Extraction: Shuhang Chen,

Xianliang Huang,

Zhizhou Zhong,

Juhong Guan,

Shuigeng Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Shuhang and Huang, Xianliang and Zhong, Zhizhou and Guan, Juhong and Zhou, Shuigeng}, title = {A Focused Human Body Model for Accurate Anthropometric Measurements Extraction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22658-22667} }
ACE: Anti-Editing Concept Erasure in Text-to-Image Models: Zihao Wang,

Yuxiang Wei,

Fan Li,

Renjing Pei,

Hang Xu,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zihao and Wei, Yuxiang and Li, Fan and Pei, Renjing and Xu, Hang and Zuo, Wangmeng}, title = {ACE: Anti-Editing Concept Erasure in Text-to-Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23505-23515} }
Synthetic-to-Real Self-supervised Robust Depth Estimation via Learning with Motion and Structure Priors: Weilong Yan,

Ming Li,

Haipeng Li,

Shuwei Shao,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Weilong and Li, Ming and Li, Haipeng and Shao, Shuwei and Tan, Robby T.}, title = {Synthetic-to-Real Self-supervised Robust Depth Estimation via Learning with Motion and Structure Priors}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21880-21890} }
Hierarchical Knowledge Prompt Tuning for Multi-task Test-Time Adaptation: Qiang Zhang,

Mengsheng Zhao,

Jiawei Liu,

Fanrui Zhang,

Yongchao Xu,

Zheng-Jun Zha; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qiang and Zhao, Mengsheng and Liu, Jiawei and Zhang, Fanrui and Xu, Yongchao and Zha, Zheng-Jun}, title = {Hierarchical Knowledge Prompt Tuning for Multi-task Test-Time Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30524-30533} }
LaTexBlend: Scaling Multi-concept Customized Generation with Latent Textual Blending: Jian Jin,

Zhenbo Yu,

Yang Shen,

Zhenyong Fu,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2025_CVPR, author = {Jin, Jian and Yu, Zhenbo and Shen, Yang and Fu, Zhenyong and Yang, Jian}, title = {LaTexBlend: Scaling Multi-concept Customized Generation with Latent Textual Blending}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23585-23594} }
DejaVid: Encoder-Agnostic Learned Temporal Matching for Video Classification: Darryl Ho,

Samuel Madden; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2025_CVPR, author = {Ho, Darryl and Madden, Samuel}, title = {DejaVid: Encoder-Agnostic Learned Temporal Matching for Video Classification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24023-24032} }
Let's Verify and Reinforce Image Generation Step by Step: Renrui Zhang,

Chengzhuo Tong,

Zhizheng Zhao,

Ziyu Guo,

Haoquan Zhang,

Manyuan Zhang,

Jiaming Liu,

Peng Gao,

Hongsheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Renrui and Tong, Chengzhuo and Zhao, Zhizheng and Guo, Ziyu and Zhang, Haoquan and Zhang, Manyuan and Liu, Jiaming and Gao, Peng and Li, Hongsheng}, title = {Let's Verify and Reinforce Image Generation Step by Step}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28662-28672} }
All-Optical Nonlinear Diffractive Deep Network for Ultrafast Image Denoising: Xiaoling Zhou,

Zhemg Lee,

Wei Ye,

Rui Xie,

Wenbo Zhang,

Guanju Peng,

Zongze Li,

Shikun Zhang; [pdf]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Xiaoling and Lee, Zhemg and Ye, Wei and Xie, Rui and Zhang, Wenbo and Peng, Guanju and Li, Zongze and Zhang, Shikun}, title = {All-Optical Nonlinear Diffractive Deep Network for Ultrafast Image Denoising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28221-28231} }
UNOPose: Unseen Object Pose Estimation with an Unposed RGB-D Reference Image: Xingyu Liu,

Gu Wang,

Ruida Zhang,

Chenyangguang Zhang,

Federico Tombari,

Xiangyang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xingyu and Wang, Gu and Zhang, Ruida and Zhang, Chenyangguang and Tombari, Federico and Ji, Xiangyang}, title = {UNOPose: Unseen Object Pose Estimation with an Unposed RGB-D Reference Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22023-22034} }
HybridMQA: Exploring Geometry-Texture Interactions for Colored Mesh Quality Assessment: Armin Shafiee Sarvestani,

Sheyang Tang,

Zhou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarvestani_2025_CVPR, author = {Sarvestani, Armin Shafiee and Tang, Sheyang and Wang, Zhou}, title = {HybridMQA: Exploring Geometry-Texture Interactions for Colored Mesh Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21414-21424} }
SIR-DIFF: Sparse Image Sets Restoration with Multi-View Diffusion Model: Yucheng Mao,

Boyang Wang,

Nilesh Kulkarni,

Jeong Joon Park; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Yucheng and Wang, Boyang and Kulkarni, Nilesh and Park, Jeong Joon}, title = {SIR-DIFF: Sparse Image Sets Restoration with Multi-View Diffusion Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21620-21630} }
Reversible Decoupling Network for Single Image Reflection Removal: Hao Zhao,

Mingjia Li,

Qiming Hu,

Xiaojie Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hao and Li, Mingjia and Hu, Qiming and Guo, Xiaojie}, title = {Reversible Decoupling Network for Single Image Reflection Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26430-26439} }
Hierarchical Features Matter: A Deep Exploration of Progressive Parameterization Method for Dataset Distillation: Xinhao Zhong,

Hao Fang,

Bin Chen,

Xulin Gu,

Meikang Qiu,

Shuhan Qi,

Shu-Tao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Xinhao and Fang, Hao and Chen, Bin and Gu, Xulin and Qiu, Meikang and Qi, Shuhan and Xia, Shu-Tao}, title = {Hierarchical Features Matter: A Deep Exploration of Progressive Parameterization Method for Dataset Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30462-30471} }
GLASS: Guided Latent Slot Diffusion for Object-Centric Learning: Krishnakant Singh,

Simone Schaub-Meyer,

Stefan Roth; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2025_CVPR, author = {Singh, Krishnakant and Schaub-Meyer, Simone and Roth, Stefan}, title = {GLASS: Guided Latent Slot Diffusion for Object-Centric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28673-28683} }
SASep: Saliency-Aware Structured Separation of Geometry and Feature for Open Set Learning on Point Clouds: Jinfeng Xu,

Xianzhi Li,

Yuan Tang,

Xu Han,

Qiao Yu,

Yixue Hao,

Long Hu,

Min Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jinfeng and Li, Xianzhi and Tang, Yuan and Han, Xu and Yu, Qiao and Hao, Yixue and Hu, Long and Chen, Min}, title = {SASep: Saliency-Aware Structured Separation of Geometry and Feature for Open Set Learning on Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27295-27304} }
Low-Biased General Annotated Dataset Generation: Dengyang Jiang,

Haoyu Wang,

Lei Zhang,

Wei Wei,

Guang Dai,

Mengmeng Wang,

Jingdong Wang,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Dengyang and Wang, Haoyu and Zhang, Lei and Wei, Wei and Dai, Guang and Wang, Mengmeng and Wang, Jingdong and Zhang, Yanning}, title = {Low-Biased General Annotated Dataset Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25113-25123} }
Generative Hard Example Augmentation for Semantic Point Cloud Segmentation: Qi Zhang,

Jibin Peng,

Zhao Huang,

Wei Feng,

Di Lin; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Qi and Peng, Jibin and Huang, Zhao and Feng, Wei and Lin, Di}, title = {Generative Hard Example Augmentation for Semantic Point Cloud Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22205-22214} }
ETAP: Event-based Tracking of Any Point: Friedhelm Hamann,

Daniel Gehrig,

Filbert Febryanto,

Kostas Daniilidis,

Guillermo Gallego; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hamann_2025_CVPR, author = {Hamann, Friedhelm and Gehrig, Daniel and Febryanto, Filbert and Daniilidis, Kostas and Gallego, Guillermo}, title = {ETAP: Event-based Tracking of Any Point}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27186-27196} }
Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual Knowledge: Yaqi Zhao,

Yuanyang Yin,

Lin Li,

Mingan Lin,

Victor Shea-Jay Huang,

Siwei Chen,

Weipeng Chen,

Baoqun Yin,

Zenan Zhou,

Wentao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Yaqi and Yin, Yuanyang and Li, Lin and Lin, Mingan and Huang, Victor Shea-Jay and Chen, Siwei and Chen, Weipeng and Yin, Baoqun and Zhou, Zenan and Zhang, Wentao}, title = {Beyond Sight: Towards Cognitive Alignment in LVLM via Enriched Visual Knowledge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24950-24959} }
Volumetric Surfaces: Representing Fuzzy Geometries with Layered Meshes: Stefano Esposito,

Anpei Chen,

Christian Reiser,

Samuel Rota Bulò,

Lorenzo Porzi,

Katja Schwarz,

Christian Richardt,

Michael Zollhöfer,

Peter Kontschieder,

Andreas Geiger; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Esposito_2025_CVPR, author = {Esposito, Stefano and Chen, Anpei and Reiser, Christian and Bul\`o, Samuel Rota and Porzi, Lorenzo and Schwarz, Katja and Richardt, Christian and Zollh\"ofer, Michael and Kontschieder, Peter and Geiger, Andreas}, title = {Volumetric Surfaces: Representing Fuzzy Geometries with Layered Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21370-21380} }
STEPS: Sequential Probability Tensor Estimation for Text-to-Image Hard Prompt Search: Yuning Qiu,

Andong Wang,

Chao Li,

Haonan Huang,

Guoxu Zhou,

Qibin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Yuning and Wang, Andong and Li, Chao and Huang, Haonan and Zhou, Guoxu and Zhao, Qibin}, title = {STEPS: Sequential Probability Tensor Estimation for Text-to-Image Hard Prompt Search}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28640-28650} }
VIRES: Video Instance Repainting via Sketch and Text Guided Generation: Shuchen Weng,

Haojie Zheng,

Peixuan Zhang,

Yuchen Hong,

Han Jiang,

Si Li,

Boxin Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2025_CVPR, author = {Weng, Shuchen and Zheng, Haojie and Zhang, Peixuan and Hong, Yuchen and Jiang, Han and Li, Si and Shi, Boxin}, title = {VIRES: Video Instance Repainting via Sketch and Text Guided Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28416-28425} }
MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling: Yifang Men,

Yuan Yao,

Miaomiao Cui,

Liefeng Bo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Men_2025_CVPR, author = {Men, Yifang and Yao, Yuan and Cui, Miaomiao and Bo, Liefeng}, title = {MIMO: Controllable Character Video Synthesis with Spatial Decomposed Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21181-21191} }
From Sparse to Dense: Camera Relocalization with Scene-Specific Detector from Feature Gaussian Splatting: Zhiwei Huang,

Hailin Yu,

Yichun Shentu,

Jin Yuan,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhiwei and Yu, Hailin and Shentu, Yichun and Yuan, Jin and Zhang, Guofeng}, title = {From Sparse to Dense: Camera Relocalization with Scene-Specific Detector from Feature Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27059-27069} }
StableAnimator: High-Quality Identity-Preserving Human Image Animation: Shuyuan Tu,

Zhen Xing,

Xintong Han,

Zhi-Qi Cheng,

Qi Dai,

Chong Luo,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Shuyuan and Xing, Zhen and Han, Xintong and Cheng, Zhi-Qi and Dai, Qi and Luo, Chong and Wu, Zuxuan}, title = {StableAnimator: High-Quality Identity-Preserving Human Image Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21096-21106} }
OODD: Test-time Out-of-Distribution Detection with Dynamic Dictionary: Yifeng Yang,

Lin Zhu,

Zewen Sun,

Hengyu Liu,

Qinying Gu,

Nanyang Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yifeng and Zhu, Lin and Sun, Zewen and Liu, Hengyu and Gu, Qinying and Ye, Nanyang}, title = {OODD: Test-time Out-of-Distribution Detection with Dynamic Dictionary}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30630-30639} }
BIMBA: Selective-Scan Compression for Long-Range Video Question Answering: Md Mohaiminul Islam,

Tushar Nagarajan,

Huiyu Wang,

Gedas Bertasius,

Lorenzo Torresani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Islam_2025_CVPR, author = {Islam, Md Mohaiminul and Nagarajan, Tushar and Wang, Huiyu and Bertasius, Gedas and Torresani, Lorenzo}, title = {BIMBA: Selective-Scan Compression for Long-Range Video Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29096-29107} }
Diff2Flow: Training Flow Matching Models via Diffusion Model Alignment: Johannes Schusterbauer,

Ming Gui,

Frank Fundel,

Björn Ommer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schusterbauer_2025_CVPR, author = {Schusterbauer, Johannes and Gui, Ming and Fundel, Frank and Ommer, Bj\"orn}, title = {Diff2Flow: Training Flow Matching Models via Diffusion Model Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28347-28357} }
Prof. Robot: Differentiable Robot Rendering Without Static and Self-Collisions: Quanyuan Ruan,

Jiabao Lei,

Wenhao Yuan,

Yanglin Zhang,

Dekun Lu,

Guiliang Liu,

Kui Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ruan_2025_CVPR, author = {Ruan, Quanyuan and Lei, Jiabao and Yuan, Wenhao and Zhang, Yanglin and Lu, Dekun and Liu, Guiliang and Jia, Kui}, title = {Prof. Robot: Differentiable Robot Rendering Without Static and Self-Collisions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22562-22572} }
DropGaussian: Structural Regularization for Sparse-view Gaussian Splatting: Hyunwoo Park,

Gun Ryu,

Wonjun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Hyunwoo and Ryu, Gun and Kim, Wonjun}, title = {DropGaussian: Structural Regularization for Sparse-view Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21600-21609} }
Blurred LiDAR for Sharper 3D: Robust Handheld 3D Scanning with Diffuse LiDAR and RGB: Nikhil Behari,

Aaron Young,

Siddharth Somasundaram,

Tzofi Klinghoffer,

Akshat Dave,

Ramesh Raskar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behari_2025_CVPR, author = {Behari, Nikhil and Young, Aaron and Somasundaram, Siddharth and Klinghoffer, Tzofi and Dave, Akshat and Raskar, Ramesh}, title = {Blurred LiDAR for Sharper 3D: Robust Handheld 3D Scanning with Diffuse LiDAR and RGB}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26954-26964} }
Novel View Synthesis with Pixel-Space Diffusion Models: Noam Elata,

Bahjat Kawar,

Yaron Ostrovsky-Berman,

Miriam Farber,

Ron Sokolovsky; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Elata_2025_CVPR, author = {Elata, Noam and Kawar, Bahjat and Ostrovsky-Berman, Yaron and Farber, Miriam and Sokolovsky, Ron}, title = {Novel View Synthesis with Pixel-Space Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26756-26766} }
Object Detection using Event Camera: A MoE Heat Conduction based Detector and A New Benchmark Dataset: Xiao Wang,

Yu Jin,

Wentao Wu,

Wei Zhang,

Lin Zhu,

Bo Jiang,

Yonghong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xiao and Jin, Yu and Wu, Wentao and Zhang, Wei and Zhu, Lin and Jiang, Bo and Tian, Yonghong}, title = {Object Detection using Event Camera: A MoE Heat Conduction based Detector and A New Benchmark Dataset}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29321-29330} }
Document Haystacks: Vision-Language Reasoning Over Piles of 1000+ Documents: Jun Chen,

Dannong Xu,

Junjie Fei,

Chun-Mei Feng,

Mohamed Elhoseiny; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jun and Xu, Dannong and Fei, Junjie and Feng, Chun-Mei and Elhoseiny, Mohamed}, title = {Document Haystacks: Vision-Language Reasoning Over Piles of 1000+ Documents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24817-24826} }
Rethinking Few-Shot Adaptation of Vision-Language Models in Two Stages: Matteo Farina,

Massimiliano Mancini,

Giovanni Iacca,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Farina_2025_CVPR, author = {Farina, Matteo and Mancini, Massimiliano and Iacca, Giovanni and Ricci, Elisa}, title = {Rethinking Few-Shot Adaptation of Vision-Language Models in Two Stages}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29989-29998} }
TAGA: Self-supervised Learning for Template-free Animatable Gaussian Articulated Model: Zhichao Zhai,

Guikun Chen,

Wenguan Wang,

Dong Zheng,

Jun Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Zhai_2025_CVPR, author = {Zhai, Zhichao and Chen, Guikun and Wang, Wenguan and Zheng, Dong and Xiao, Jun}, title = {TAGA: Self-supervised Learning for Template-free Animatable Gaussian Articulated Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21159-21169} }
Horizon-GS: Unified 3D Gaussian Splatting for Large-Scale Aerial-to-Ground Scenes: Lihan Jiang,

Kerui Ren,

Mulin Yu,

Linning Xu,

Junting Dong,

Tao Lu,

Feng Zhao,

Dahua Lin,

Bo Dai; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Lihan and Ren, Kerui and Yu, Mulin and Xu, Linning and Dong, Junting and Lu, Tao and Zhao, Feng and Lin, Dahua and Dai, Bo}, title = {Horizon-GS: Unified 3D Gaussian Splatting for Large-Scale Aerial-to-Ground Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26789-26799} }
LotusFilter: Fast Diverse Nearest Neighbor Search via a Learned Cutoff Table: Yusuke Matsui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsui_2025_CVPR, author = {Matsui, Yusuke}, title = {LotusFilter: Fast Diverse Nearest Neighbor Search via a Learned Cutoff Table}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30430-30439} }
Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation: Yuanqi Yao,

Siao Liu,

Haoming Song,

Delin Qu,

Qizhi Chen,

Yan Ding,

Bin Zhao,

Zhigang Wang,

Xuelong Li,

Dong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2025_CVPR, author = {Yao, Yuanqi and Liu, Siao and Song, Haoming and Qu, Delin and Chen, Qizhi and Ding, Yan and Zhao, Bin and Wang, Zhigang and Li, Xuelong and Wang, Dong}, title = {Think Small, Act Big: Primitive Prompt Learning for Lifelong Robot Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22573-22583} }
SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning: Seokju Yun,

Seunghye Chae,

Dongheon Lee,

Youngmin Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_CVPR, author = {Yun, Seokju and Chae, Seunghye and Lee, Dongheon and Ro, Youngmin}, title = {SoMA: Singular Value Decomposed Minor Components Adaptation for Domain Generalizable Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25602-25612} }
Ref-GS: Directional Factorization for 2D Gaussian Splatting: Youjia Zhang,

Anpei Chen,

Yumin Wan,

Zikai Song,

Junqing Yu,

Yawei Luo,

Wei Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Youjia and Chen, Anpei and Wan, Yumin and Song, Zikai and Yu, Junqing and Luo, Yawei and Yang, Wei}, title = {Ref-GS: Directional Factorization for 2D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26483-26492} }
VDocRAG: Retrieval-Augmented Generation over Visually-Rich Documents: Ryota Tanaka,

Taichi Iki,

Taku Hasegawa,

Kyosuke Nishida,

Kuniko Saito,

Jun Suzuki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tanaka_2025_CVPR, author = {Tanaka, Ryota and Iki, Taichi and Hasegawa, Taku and Nishida, Kyosuke and Saito, Kuniko and Suzuki, Jun}, title = {VDocRAG: Retrieval-Augmented Generation over Visually-Rich Documents}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24827-24837} }
Concept Lancet: Image Editing with Compositional Representation Transplant: Jinqi Luo,

Tianjiao Ding,

Kwan Ho Ryan Chan,

Hancheng Min,

Chris Callison-Burch,

Rene Vidal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Jinqi and Ding, Tianjiao and Chan, Kwan Ho Ryan and Min, Hancheng and Callison-Burch, Chris and Vidal, Rene}, title = {Concept Lancet: Image Editing with Compositional Representation Transplant}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28502-28512} }
Generative Densification: Learning to Densify Gaussians for High-Fidelity Generalizable 3D Reconstruction: Seungtae Nam,

Xiangyu Sun,

Gyeongjin Kang,

Younggeun Lee,

Seungjun Oh,

Eunbyung Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2025_CVPR, author = {Nam, Seungtae and Sun, Xiangyu and Kang, Gyeongjin and Lee, Younggeun and Oh, Seungjun and Park, Eunbyung}, title = {Generative Densification: Learning to Densify Gaussians for High-Fidelity Generalizable 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26683-26693} }
Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis: Chaoyou Fu,

Yuhan Dai,

Yongdong Luo,

Lei Li,

Shuhuai Ren,

Renrui Zhang,

Zihan Wang,

Chenyu Zhou,

Yunhang Shen,

Mengdan Zhang,

Peixian Chen,

Yanwei Li,

Shaohui Lin,

Sirui Zhao,

Ke Li,

Tong Xu,

Xiawu Zheng,

Enhong Chen,

Caifeng Shan,

Ran He,

Xing Sun; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2025_CVPR, author = {Fu, Chaoyou and Dai, Yuhan and Luo, Yongdong and Li, Lei and Ren, Shuhuai and Zhang, Renrui and Wang, Zihan and Zhou, Chenyu and Shen, Yunhang and Zhang, Mengdan and Chen, Peixian and Li, Yanwei and Lin, Shaohui and Zhao, Sirui and Li, Ke and Xu, Tong and Zheng, Xiawu and Chen, Enhong and Shan, Caifeng and He, Ran and Sun, Xing}, title = {Video-MME: The First-Ever Comprehensive Evaluation Benchmark of Multi-modal LLMs in Video Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24108-24118} }
Are Images Indistinguishable to Humans Also Indistinguishable to Classifiers?: Zebin You,

Xinyu Zhang,

Hanzhong Guo,

Jingdong Wang,

Chongxuan Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{You_2025_CVPR, author = {You, Zebin and Zhang, Xinyu and Guo, Hanzhong and Wang, Jingdong and Li, Chongxuan}, title = {Are Images Indistinguishable to Humans Also Indistinguishable to Classifiers?}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28790-28800} }
Understanding Multi-layered Transmission Matrices: Anat Levin,

Marina Alterman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Levin_2025_CVPR, author = {Levin, Anat and Alterman, Marina}, title = {Understanding Multi-layered Transmission Matrices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23164-23173} }
GS-DiT: Advancing Video Generation with Dynamic 3D Gaussian Fields through Efficient Dense 3D Point Tracking: Weikang Bian,

Zhaoyang Huang,

Xiaoyu Shi,

Yijin Li,

Fu-Yun Wang,

Hongsheng Li; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2025_CVPR, author = {Bian, Weikang and Huang, Zhaoyang and Shi, Xiaoyu and Li, Yijin and Wang, Fu-Yun and Li, Hongsheng}, title = {GS-DiT: Advancing Video Generation with Dynamic 3D Gaussian Fields through Efficient Dense 3D Point Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21717-21727} }
AnyMoLe: Any Character Motion In-betweening Leveraging Video Diffusion Models: Kwan Yun,

Seokhyeon Hong,

Chaelin Kim,

Junyong Noh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_CVPR, author = {Yun, Kwan and Hong, Seokhyeon and Kim, Chaelin and Noh, Junyong}, title = {AnyMoLe: Any Character Motion In-betweening Leveraging Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27838-27848} }
Dual Energy-Based Model with Open-World Uncertainty Estimation for Out-of-distribution Detection: Qi Chen,

Hu Ding; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Qi and Ding, Hu}, title = {Dual Energy-Based Model with Open-World Uncertainty Estimation for Out-of-distribution Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25728-25737} }
DTGBrepGen: A Novel B-rep Generative Model through Decoupling Topology and Geometry: Jing Li,

Yihang Fu,

Falai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jing and Fu, Yihang and Chen, Falai}, title = {DTGBrepGen: A Novel B-rep Generative Model through Decoupling Topology and Geometry}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21438-21447} }
Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation: Zilyu Ye,

Zhiyang Chen,

Tiancheng Li,

Zemin Huang,

Weijian Luo,

Guo-Jun Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2025_CVPR, author = {Ye, Zilyu and Chen, Zhiyang and Li, Tiancheng and Huang, Zemin and Luo, Weijian and Qi, Guo-Jun}, title = {Schedule On the Fly: Diffusion Time Prediction for Faster and Better Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23412-23422} }
Learning Audio-guided Video Representation with Gated Attention for Video-Text Retrieval: Boseung Jeong,

Jicheol Park,

Sungyeon Kim,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2025_CVPR, author = {Jeong, Boseung and Park, Jicheol and Kim, Sungyeon and Kwak, Suha}, title = {Learning Audio-guided Video Representation with Gated Attention for Video-Text Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26202-26211} }
TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation: Hongxiang Zhao,

Xingchen Liu,

Mutian Xu,

Yiming Hao,

Weikai Chen,

Xiaoguang Han; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Hongxiang and Liu, Xingchen and Xu, Mutian and Hao, Yiming and Chen, Weikai and Han, Xiaoguang}, title = {TASTE-Rob: Advancing Video Generation of Task-Oriented Hand-Object Interaction for Generalizable Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27683-27693} }
NoT: Federated Unlearning via Weight Negation: Yasser H. Khalil,

Leo Brunswic,

Soufiane Lamghari,

Xu Li,

Mahdi Beitollahi,

Xi Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khalil_2025_CVPR, author = {Khalil, Yasser H. and Brunswic, Leo and Lamghari, Soufiane and Li, Xu and Beitollahi, Mahdi and Chen, Xi}, title = {NoT: Federated Unlearning via Weight Negation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25759-25769} }
RANGE: Retrieval Augmented Neural Fields for Multi-Resolution Geo-Embeddings: Aayush Dhakal,

Srikumar Sastry,

Subash Khanal,

Adeel Ahmad,

Eric Xing,

Nathan Jacobs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dhakal_2025_CVPR, author = {Dhakal, Aayush and Sastry, Srikumar and Khanal, Subash and Ahmad, Adeel and Xing, Eric and Jacobs, Nathan}, title = {RANGE: Retrieval Augmented Neural Fields for Multi-Resolution Geo-Embeddings}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24680-24689} }
SimMotionEdit: Text-Based Human Motion Editing with Motion Similarity Prediction: Zhengyuan Li,

Kai Cheng,

Anindita Ghosh,

Uttaran Bhattacharya,

Liangyan Gui,

Aniket Bera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhengyuan and Cheng, Kai and Ghosh, Anindita and Bhattacharya, Uttaran and Gui, Liangyan and Bera, Aniket}, title = {SimMotionEdit: Text-Based Human Motion Editing with Motion Similarity Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27827-27837} }
From Head to Tail: Efficient Black-box Model Inversion Attack via Long-tailed Learning: Ziang Li,

Hongguang Zhang,

Juan Wang,

Meihui Chen,

Hongxin Hu,

Wenzhe Yi,

Xiaoyang Xu,

Mengda Yang,

Chenjun Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Ziang and Zhang, Hongguang and Wang, Juan and Chen, Meihui and Hu, Hongxin and Yi, Wenzhe and Xu, Xiaoyang and Yang, Mengda and Ma, Chenjun}, title = {From Head to Tail: Efficient Black-box Model Inversion Attack via Long-tailed Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29288-29298} }
SIDA: Social Media Image Deepfake Detection, Localization and Explanation with Large Multimodal Model: Zhenglin Huang,

Jinwei Hu,

Xiangtai Li,

Yiwei He,

Xingyu Zhao,

Bei Peng,

Baoyuan Wu,

Xiaowei Huang,

Guangliang Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhenglin and Hu, Jinwei and Li, Xiangtai and He, Yiwei and Zhao, Xingyu and Peng, Bei and Wu, Baoyuan and Huang, Xiaowei and Cheng, Guangliang}, title = {SIDA: Social Media Image Deepfake Detection, Localization and Explanation with Large Multimodal Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28831-28841} }
Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation: Xiaoqi Li,

Jingyun Xu,

Mingxu Zhang,

Jiaming Liu,

Yan Shen,

Iaroslav Ponomarenko,

Jiahui Xu,

Liang Heng,

Siyuan Huang,

Shanghang Zhang,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Xiaoqi and Xu, Jingyun and Zhang, Mingxu and Liu, Jiaming and Shen, Yan and Ponomarenko, Iaroslav and Xu, Jiahui and Heng, Liang and Huang, Siyuan and Zhang, Shanghang and Dong, Hao}, title = {Object-Centric Prompt-Driven Vision-Language-Action Model for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27638-27648} }
Depth Any Camera: Zero-Shot Metric Depth Estimation from Any Camera: Yuliang Guo,

Sparsh Garg,

S. Mahdi H. Miangoleh,

Xinyu Huang,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yuliang and Garg, Sparsh and Miangoleh, S. Mahdi H. and Huang, Xinyu and Ren, Liu}, title = {Depth Any Camera: Zero-Shot Metric Depth Estimation from Any Camera}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26996-27006} }
ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion Models: Ozgur Kara,

Krishna Kumar Singh,

Feng Liu,

Duygu Ceylan,

James M. Rehg,

Tobias Hinz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kara_2025_CVPR, author = {Kara, Ozgur and Singh, Krishna Kumar and Liu, Feng and Ceylan, Duygu and Rehg, James M. and Hinz, Tobias}, title = {ShotAdapter: Text-to-Multi-Shot Video Generation with Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28405-28415} }
Sound Bridge: Associating Egocentric and Exocentric Videos via Audio Cues: Sihong Huang,

Jiaxin Wu,

Xiaoyong Wei,

Yi Cai,

Dongmei Jiang,

Yaowei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Sihong and Wu, Jiaxin and Wei, Xiaoyong and Cai, Yi and Jiang, Dongmei and Wang, Yaowei}, title = {Sound Bridge: Associating Egocentric and Exocentric Videos via Audio Cues}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28942-28951} }
OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations: Linke Ouyang,

Yuan Qu,

Hongbin Zhou,

Jiawei Zhu,

Rui Zhang,

Qunshu Lin,

Bin Wang,

Zhiyuan Zhao,

Man Jiang,

Xiaomeng Zhao,

Jin Shi,

Fan Wu,

Pei Chu,

Minghao Liu,

Zhenxiang Li,

Chao Xu,

Bo Zhang,

Botian Shi,

Zhongying Tu,

Conghui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ouyang_2025_CVPR, author = {Ouyang, Linke and Qu, Yuan and Zhou, Hongbin and Zhu, Jiawei and Zhang, Rui and Lin, Qunshu and Wang, Bin and Zhao, Zhiyuan and Jiang, Man and Zhao, Xiaomeng and Shi, Jin and Wu, Fan and Chu, Pei and Liu, Minghao and Li, Zhenxiang and Xu, Chao and Zhang, Bo and Shi, Botian and Tu, Zhongying and He, Conghui}, title = {OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24838-24848} }
LayoutVLM: Differentiable Optimization of 3D Layout via Vision-Language Models: Fan-Yun Sun,

Weiyu Liu,

Siyi Gu,

Dylan Lim,

Goutam Bhat,

Federico Tombari,

Manling Li,

Nick Haber,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Fan-Yun and Liu, Weiyu and Gu, Siyi and Lim, Dylan and Bhat, Goutam and Tombari, Federico and Li, Manling and Haber, Nick and Wu, Jiajun}, title = {LayoutVLM: Differentiable Optimization of 3D Layout via Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29469-29478} }
Point Clouds Meets Physics: Dynamic Acoustic Field Fitting Network for Point Cloud Understanding: Changshuo Wang,

Shuting He,

Xiang Fang,

Jiawei Han,

Zhonghang Liu,

Xin Ning,

Weijun Li,

Prayag Tiwari; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Changshuo and He, Shuting and Fang, Xiang and Han, Jiawei and Liu, Zhonghang and Ning, Xin and Li, Weijun and Tiwari, Prayag}, title = {Point Clouds Meets Physics: Dynamic Acoustic Field Fitting Network for Point Cloud Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22182-22192} }
Faster Parameter-Efficient Tuning with Token Redundancy Reduction: Kwonyoung Kim,

Jungin Park,

Jin Kim,

Hyeongjun Kwon,

Kwanghoon Sohn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Kwonyoung and Park, Jungin and Kim, Jin and Kwon, Hyeongjun and Sohn, Kwanghoon}, title = {Faster Parameter-Efficient Tuning with Token Redundancy Reduction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30189-30198} }
Panorama Generation From NFoV Image Done Right: Dian Zheng,

Cheng Zhang,

Xiao-Ming Wu,

Cao Li,

Chengfei Lv,

Jian-Fang Hu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Dian and Zhang, Cheng and Wu, Xiao-Ming and Li, Cao and Lv, Chengfei and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Panorama Generation From NFoV Image Done Right}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21610-21619} }
Sparse Point Cloud Patches Rendering via Splitting 2D Gaussians: Changfeng Ma,

Ran Bi,

Jie Guo,

Chongjun Wang,

Yanwen Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Changfeng and Bi, Ran and Guo, Jie and Wang, Chongjun and Guo, Yanwen}, title = {Sparse Point Cloud Patches Rendering via Splitting 2D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27285-27294} }
Distilling Monocular Foundation Model for Fine-grained Depth Completion: Yingping Liang,

Yutao Hu,

Wenqi Shao,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yingping and Hu, Yutao and Shao, Wenqi and Fu, Ying}, title = {Distilling Monocular Foundation Model for Fine-grained Depth Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22254-22265} }
AniGrad: Anisotropic Gradient-Adaptive Sampling for 3D Reconstruction From Monocular Video: Noah Stier,

Alex Rich,

Pradeep Sen,

Tobias Höllerer; [pdf] [supp]
[bibtex]
@InProceedings{Stier_2025_CVPR, author = {Stier, Noah and Rich, Alex and Sen, Pradeep and H\"ollerer, Tobias}, title = {AniGrad: Anisotropic Gradient-Adaptive Sampling for 3D Reconstruction From Monocular Video}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21814-21823} }
Less Attention is More: Prompt Transformer for Generalized Category Discovery: Wei Zhang,

Baopeng Zhang,

Zhu Teng,

Wenxin Luo,

Junnan Zou,

Jianping Fan; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wei and Zhang, Baopeng and Teng, Zhu and Luo, Wenxin and Zou, Junnan and Fan, Jianping}, title = {Less Attention is More: Prompt Transformer for Generalized Category Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30322-30331} }
AToM: Aligning Text-to-Motion Model at Event-Level with GPT-4Vision Reward: Haonan Han,

Xiangzuo Wu,

Huan Liao,

Zunnan Xu,

Zhongyuan Hu,

Ronghui Li,

Yachao Zhang,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Haonan and Wu, Xiangzuo and Liao, Huan and Xu, Zunnan and Hu, Zhongyuan and Li, Ronghui and Zhang, Yachao and Li, Xiu}, title = {AToM: Aligning Text-to-Motion Model at Event-Level with GPT-4Vision Reward}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22746-22755} }
DoF-Gaussian: Controllable Depth-of-Field for 3D Gaussian Splatting: Liao Shen,

Tianqi Liu,

Huiqiang Sun,

Jiaqi Li,

Zhiguo Cao,

Wei Li,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Liao and Liu, Tianqi and Sun, Huiqiang and Li, Jiaqi and Cao, Zhiguo and Li, Wei and Loy, Chen Change}, title = {DoF-Gaussian: Controllable Depth-of-Field for 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26462-26471} }
Reconciling Stochastic and Deterministic Strategies for Zero-shot Image Restoration using Diffusion Model in Dual: Chong Wang,

Lanqing Guo,

Zixuan Fu,

Siyuan Yang,

Hao Cheng,

Alex C. Kot,

Bihan Wen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Chong and Guo, Lanqing and Fu, Zixuan and Yang, Siyuan and Cheng, Hao and Kot, Alex C. and Wen, Bihan}, title = {Reconciling Stochastic and Deterministic Strategies for Zero-shot Image Restoration using Diffusion Model in Dual}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23207-23216} }
Hierarchical Flow Diffusion for Efficient Frame Interpolation: Yang Hai,

Guo Wang,

Tan Su,

Wenjie Jiang,

Yinlin Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Hai_2025_CVPR, author = {Hai, Yang and Wang, Guo and Su, Tan and Jiang, Wenjie and Hu, Yinlin}, title = {Hierarchical Flow Diffusion for Efficient Frame Interpolation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22943-22952} }
BASKET: A Large-Scale Video Dataset for Fine-Grained Skill Estimation: Yulu Pan,

Ce Zhang,

Gedas Bertasius; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Yulu and Zhang, Ce and Bertasius, Gedas}, title = {BASKET: A Large-Scale Video Dataset for Fine-Grained Skill Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28952-28962} }
Arbitrary-steps Image Super-resolution via Diffusion Inversion: Zongsheng Yue,

Kang Liao,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2025_CVPR, author = {Yue, Zongsheng and Liao, Kang and Loy, Chen Change}, title = {Arbitrary-steps Image Super-resolution via Diffusion Inversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23153-23163} }
Dynamic Neural Surfaces for Elastic 4D Shape Representation and Analysis: Awais Nizamani,

Hamid Laga,

Guanjin Wang,

Farid Boussaid,

Mohammed Bennamoun,

Anuj Srivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nizamani_2025_CVPR, author = {Nizamani, Awais and Laga, Hamid and Wang, Guanjin and Boussaid, Farid and Bennamoun, Mohammed and Srivastava, Anuj}, title = {Dynamic Neural Surfaces for Elastic 4D Shape Representation and Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21783-21792} }
ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously Designing Collaborative AI Systems: Xiangyuan Xue,

Zeyu Lu,

Di Huang,

Zidong Wang,

Wanli Ouyang,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Xiangyuan and Lu, Zeyu and Huang, Di and Wang, Zidong and Ouyang, Wanli and Bai, Lei}, title = {ComfyBench: Benchmarking LLM-based Agents in ComfyUI for Autonomously Designing Collaborative AI Systems}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24614-24624} }
Incomplete Multi-View Multi-label Learning via Disentangled Representation and Label Semantic Embedding: Xu Yan,

Jun Yin,

Jie Wen; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Xu and Yin, Jun and Wen, Jie}, title = {Incomplete Multi-View Multi-label Learning via Disentangled Representation and Label Semantic Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30722-30731} }
AutoURDF: Unsupervised Robot Modeling from Point Cloud Frames Using Cluster Registration: Jiong Lin,

Lechen Zhang,

Kwansoo Lee,

Jialong Ning,

Judah Goldfeder,

Hod Lipson; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Jiong and Zhang, Lechen and Lee, Kwansoo and Ning, Jialong and Goldfeder, Judah and Lipson, Hod}, title = {AutoURDF: Unsupervised Robot Modeling from Point Cloud Frames Using Cluster Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27628-27637} }
Golden Cudgel Network for Real-Time Semantic Segmentation: Guoyu Yang,

Yuan Wang,

Daming Shi,

Yanzhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Guoyu and Wang, Yuan and Shi, Daming and Wang, Yanzhong}, title = {Golden Cudgel Network for Real-Time Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25367-25376} }
Multi-modal Contrastive Learning with Negative Sampling Calibration for Phenotypic Drug Discovery: Jiahua Rao,

Hanjing Lin,

Leyu Chen,

Jiancong Xie,

Shuangjia Zheng,

Yuedong Yang; [pdf] [supp]
[bibtex]
@InProceedings{Rao_2025_CVPR, author = {Rao, Jiahua and Lin, Hanjing and Chen, Leyu and Xie, Jiancong and Zheng, Shuangjia and Yang, Yuedong}, title = {Multi-modal Contrastive Learning with Negative Sampling Calibration for Phenotypic Drug Discovery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30752-30762} }
R-TPT: Improving Adversarial Robustness of Vision-Language Models through Test-Time Prompt Tuning: Lijun Sheng,

Jian Liang,

Zilei Wang,

Ran He; [pdf] [supp]
[bibtex]
@InProceedings{Sheng_2025_CVPR, author = {Sheng, Lijun and Liang, Jian and Wang, Zilei and He, Ran}, title = {R-TPT: Improving Adversarial Robustness of Vision-Language Models through Test-Time Prompt Tuning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29958-29967} }
SplatFlow: Multi-View Rectified Flow Model for 3D Gaussian Splatting Synthesis: Hyojun Go,

Byeongjun Park,

Jiho Jang,

Jin-Young Kim,

Soonwoo Kwon,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Go_2025_CVPR, author = {Go, Hyojun and Park, Byeongjun and Jang, Jiho and Kim, Jin-Young and Kwon, Soonwoo and Kim, Changick}, title = {SplatFlow: Multi-View Rectified Flow Model for 3D Gaussian Splatting Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21524-21536} }
Boltzmann Attention Sampling for Image Analysis with Small Objects: Theodore Zhao,

Sid Kiblawi,

Naoto Usuyama,

Ho Hin Lee,

Sam Preston,

Hoifung Poon,

Mu Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Theodore and Kiblawi, Sid and Usuyama, Naoto and Lee, Ho Hin and Preston, Sam and Poon, Hoifung and Wei, Mu}, title = {Boltzmann Attention Sampling for Image Analysis with Small Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25950-25959} }
Generalized Recorrupted-to-Recorrupted: Self-Supervised Learning Beyond Gaussian Noise: Brayan Monroy,

Jorge Bacca,

Julián Tachella; [pdf] [supp]
[bibtex]
@InProceedings{Monroy_2025_CVPR, author = {Monroy, Brayan and Bacca, Jorge and Tachella, Juli\'an}, title = {Generalized Recorrupted-to-Recorrupted: Self-Supervised Learning Beyond Gaussian Noise}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28155-28164} }
Dynamic Motion Blending for Versatile Motion Editing: Nan Jiang,

Hongjie Li,

Ziye Yuan,

Zimo He,

Yixin Chen,

Tengyu Liu,

Yixin Zhu,

Siyuan Huang; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Nan and Li, Hongjie and Yuan, Ziye and He, Zimo and Chen, Yixin and Liu, Tengyu and Zhu, Yixin and Huang, Siyuan}, title = {Dynamic Motion Blending for Versatile Motion Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22735-22745} }
StdGEN: Semantic-Decomposed 3D Character Generation from Single Images: Yuze He,

Yanning Zhou,

Wang Zhao,

Zhongkai Wu,

Kaiwen Xiao,

Wei Yang,

Yong-Jin Liu,

Xiao Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Yuze and Zhou, Yanning and Zhao, Wang and Wu, Zhongkai and Xiao, Kaiwen and Yang, Wei and Liu, Yong-Jin and Han, Xiao}, title = {StdGEN: Semantic-Decomposed 3D Character Generation from Single Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26345-26355} }
Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting: Jingyi Xu,

Xieyuanli Chen,

Junyi Ma,

Jiawei Huang,

Jintao Xu,

Yue Wang,

Ling Pei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Jingyi and Chen, Xieyuanli and Ma, Junyi and Huang, Jiawei and Xu, Jintao and Wang, Yue and Pei, Ling}, title = {Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22338-22347} }
FFR: Frequency Feature Rectification for Weakly Supervised Semantic Segmentation: Ziqian Yang,

Xinqiao Zhao,

Xiaolei Wang,

Quan Zhang,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Ziqian and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {FFR: Frequency Feature Rectification for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30261-30270} }
Video-XL: Extra-Long Vision Language Model for Hour-Scale Video Understanding: Yan Shu,

Zheng Liu,

Peitian Zhang,

Minghao Qin,

Junjie Zhou,

Zhengyang Liang,

Tiejun Huang,

Bo Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Shu_2025_CVPR, author = {Shu, Yan and Liu, Zheng and Zhang, Peitian and Qin, Minghao and Zhou, Junjie and Liang, Zhengyang and Huang, Tiejun and Zhao, Bo}, title = {Video-XL: Extra-Long Vision Language Model for Hour-Scale Video Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26160-26169} }
Sonata: Self-Supervised Learning of Reliable Point Representations: Xiaoyang Wu,

Daniel DeTone,

Duncan Frost,

Tianwei Shen,

Chris Xie,

Nan Yang,

Jakob Engel,

Richard Newcombe,

Hengshuang Zhao,

Julian Straub; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Xiaoyang and DeTone, Daniel and Frost, Duncan and Shen, Tianwei and Xie, Chris and Yang, Nan and Engel, Jakob and Newcombe, Richard and Zhao, Hengshuang and Straub, Julian}, title = {Sonata: Self-Supervised Learning of Reliable Point Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22193-22204} }
DriveGEN: Generalized and Robust 3D Detection in Driving via Controllable Text-to-Image Diffusion Generation: Hongbin Lin,

Zilu Guo,

Yifan Zhang,

Shuaicheng Niu,

Yafeng Li,

Ruimao Zhang,

Shuguang Cui,

Zhen Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Hongbin and Guo, Zilu and Zhang, Yifan and Niu, Shuaicheng and Li, Yafeng and Zhang, Ruimao and Cui, Shuguang and Li, Zhen}, title = {DriveGEN: Generalized and Robust 3D Detection in Driving via Controllable Text-to-Image Diffusion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27497-27507} }
DRiVE: Diffusion-based Rigging Empowers Generation of Versatile and Expressive Characters: Mingze Sun,

Junhao Chen,

Junting Dong,

Yurun Chen,

Xinyu Jiang,

Shiwei Mao,

Puhua Jiang,

Jingbo Wang,

Bo Dai,

Ruqi Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Mingze and Chen, Junhao and Dong, Junting and Chen, Yurun and Jiang, Xinyu and Mao, Shiwei and Jiang, Puhua and Wang, Jingbo and Dai, Bo and Huang, Ruqi}, title = {DRiVE: Diffusion-based Rigging Empowers Generation of Versatile and Expressive Characters}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21170-21180} }
A Unified Approach to Interpreting Self-supervised Pre-training Methods for 3D Point Clouds via Interactions: Qiang Li,

Jian Ruan,

Fanghao Wu,

Yuchi Chen,

Zhihua Wei,

Wen Shen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Qiang and Ruan, Jian and Wu, Fanghao and Chen, Yuchi and Wei, Zhihua and Shen, Wen}, title = {A Unified Approach to Interpreting Self-supervised Pre-training Methods for 3D Point Clouds via Interactions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27315-27324} }
Enhancing SAM with Efficient Prompting and Preference Optimization for Semi-supervised Medical Image Segmentation: Aishik Konwer,

Zhijian Yang,

Erhan Bas,

Cao Xiao,

Prateek Prasanna,

Parminder Bhatia,

Taha Kass-Hout; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Konwer_2025_CVPR, author = {Konwer, Aishik and Yang, Zhijian and Bas, Erhan and Xiao, Cao and Prasanna, Prateek and Bhatia, Parminder and Kass-Hout, Taha}, title = {Enhancing SAM with Efficient Prompting and Preference Optimization for Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20990-21000} }
STEREO: A Two-Stage Framework for Adversarially Robust Concept Erasing from Text-to-Image Diffusion Models: Koushik Srivatsan,

Fahad Shamshad,

Muzammal Naseer,

Vishal M. Patel,

Karthik Nandakumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Srivatsan_2025_CVPR, author = {Srivatsan, Koushik and Shamshad, Fahad and Naseer, Muzammal and Patel, Vishal M. and Nandakumar, Karthik}, title = {STEREO: A Two-Stage Framework for Adversarially Robust Concept Erasing from Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23765-23774} }
GenVDM: Generating Vector Displacement Maps From a Single Image: Yuezhi Yang,

Qimin Chen,

Vladimir G. Kim,

Siddhartha Chaudhuri,

Qixing Huang,

Zhiqin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yuezhi and Chen, Qimin and Kim, Vladimir G. and Chaudhuri, Siddhartha and Huang, Qixing and Chen, Zhiqin}, title = {GenVDM: Generating Vector Displacement Maps From a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26618-26629} }
Effective SAM Combination for Open-Vocabulary Semantic Segmentation: Minhyeok Lee,

Suhwan Cho,

Jungho Lee,

Sunghun Yang,

Heeseung Choi,

Ig-Jae Kim,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Minhyeok and Cho, Suhwan and Lee, Jungho and Yang, Sunghun and Choi, Heeseung and Kim, Ig-Jae and Lee, Sangyoun}, title = {Effective SAM Combination for Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26081-26090} }
Towards Visual Discrimination and Reasoning of Real-World Physical Dynamics: Physics-Grounded Anomaly Detection: Wenqiao Li,

Yao Gu,

Xintao Chen,

Xiaohao Xu,

Ming Hu,

Xiaonan Huang,

Yingna Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wenqiao and Gu, Yao and Chen, Xintao and Xu, Xiaohao and Hu, Ming and Huang, Xiaonan and Wu, Yingna}, title = {Towards Visual Discrimination and Reasoning of Real-World Physical Dynamics: Physics-Grounded Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30409-30419} }
UniAP: Unifying Inter- and Intra-Layer Automatic Parallelism by Mixed Integer Quadratic Programming: Hao Lin,

Ke Wu,

Jie Li,

Jun Li,

Wu-Jun Li; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Hao and Wu, Ke and Li, Jie and Li, Jun and Li, Wu-Jun}, title = {UniAP: Unifying Inter- and Intra-Layer Automatic Parallelism by Mixed Integer Quadratic Programming}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20947-20957} }
Turbo3D: Ultra-fast Text-to-3D Generation: Hanzhe Hu,

Tianwei Yin,

Fujun Luan,

Yiwei Hu,

Hao Tan,

Zexiang Xu,

Sai Bi,

Shubham Tulsiani,

Kai Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Hanzhe and Yin, Tianwei and Luan, Fujun and Hu, Yiwei and Tan, Hao and Xu, Zexiang and Bi, Sai and Tulsiani, Shubham and Zhang, Kai}, title = {Turbo3D: Ultra-fast Text-to-3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23668-23678} }
SUM Parts: Benchmarking Part-Level Semantic Segmentation of Urban Meshes: Weixiao Gao,

Liangliang Nan,

Hugo Ledoux; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Weixiao and Nan, Liangliang and Ledoux, Hugo}, title = {SUM Parts: Benchmarking Part-Level Semantic Segmentation of Urban Meshes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24474-24484} }
MODA: Motion-Drift Augmentation for Inertial Human Motion Analysis: Yinghao Wu,

Shihui Guo,

Yipeng Qin; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Yinghao and Guo, Shihui and Qin, Yipeng}, title = {MODA: Motion-Drift Augmentation for Inertial Human Motion Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27771-27781} }
Higher-Order Ratio Cycles for Fast and Globally Optimal Shape Matching: Paul Roetzer,

Viktoria Ehm,

Daniel Cremers,

Zorah Lähner,

Florian Bernard; [pdf] [supp]
[bibtex]
@InProceedings{Roetzer_2025_CVPR, author = {Roetzer, Paul and Ehm, Viktoria and Cremers, Daniel and L\"ahner, Zorah and Bernard, Florian}, title = {Higher-Order Ratio Cycles for Fast and Globally Optimal Shape Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21793-21803} }
Hyperdimensional Uncertainty Quantification for Multimodal Uncertainty Fusion in Autonomous Vehicles Perception: Luke Chen,

Junyao Wang,

Trier Mortlock,

Pramod Khargonekar,

Mohammad Abdullah Al Faruque; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Luke and Wang, Junyao and Mortlock, Trier and Khargonekar, Pramod and Al Faruque, Mohammad Abdullah}, title = {Hyperdimensional Uncertainty Quantification for Multimodal Uncertainty Fusion in Autonomous Vehicles Perception}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22306-22316} }
GIFStream: 4D Gaussian-based Immersive Video with Feature Stream: Hao Li,

Sicheng Li,

Xiang Gao,

Abudouaihati Batuer,

Lu Yu,

Yiyi Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hao and Li, Sicheng and Gao, Xiang and Batuer, Abudouaihati and Yu, Lu and Liao, Yiyi}, title = {GIFStream: 4D Gaussian-based Immersive Video with Feature Stream}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21761-21770} }
Multi-Scale Neighborhood Occupancy Masked Autoencoder for Self-Supervised Learning in LiDAR Point Clouds: Mohamed Abdelsamad,

Michael Ulrich,

Claudius Glaeser,

Abhinav Valada; [pdf] [supp]
[bibtex]
@InProceedings{Abdelsamad_2025_CVPR, author = {Abdelsamad, Mohamed and Ulrich, Michael and Glaeser, Claudius and Valada, Abhinav}, title = {Multi-Scale Neighborhood Occupancy Masked Autoencoder for Self-Supervised Learning in LiDAR Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22234-22243} }
PLeaS - Merging Models with Permutations and Least Squares: Anshul Nasery,

Jonathan Hayase,

Pang Wei Koh,

Sewoong Oh; [pdf] [supp]
[bibtex]
@InProceedings{Nasery_2025_CVPR, author = {Nasery, Anshul and Hayase, Jonathan and Koh, Pang Wei and Oh, Sewoong}, title = {PLeaS - Merging Models with Permutations and Least Squares}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30493-30502} }
Incremental Object Keypoint Learning: Mingfu Liang,

Jiahuan Zhou,

Xu Zou,

Ying Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Mingfu and Zhou, Jiahuan and Zou, Xu and Wu, Ying}, title = {Incremental Object Keypoint Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25399-25410} }
InteractVLM: 3D Interaction Reasoning from 2D Foundational Models: Sai Kumar Dwivedi,

Dimitrije Antić,

Shashank Tripathi,

Omid Taheri,

Cordelia Schmid,

Michael J. Black,

Dimitrios Tzionas; [pdf] [supp]
[bibtex]
@InProceedings{Dwivedi_2025_CVPR, author = {Dwivedi, Sai Kumar and Anti\'c, Dimitrije and Tripathi, Shashank and Taheri, Omid and Schmid, Cordelia and Black, Michael J. and Tzionas, Dimitrios}, title = {InteractVLM: 3D Interaction Reasoning from 2D Foundational Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22605-22615} }
Attribute-Missing Multi-view Graph Clustering: Bowen Zhao,

Qianqian Wang,

Zhengming Ding,

Quanxue Gao; [pdf]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Bowen and Wang, Qianqian and Ding, Zhengming and Gao, Quanxue}, title = {Attribute-Missing Multi-view Graph Clustering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25832-25841} }
Pose-Guided Temporal Enhancement for Robust Low-Resolution Hand Reconstruction: Kaixin Fan,

Pengfei Ren,

Jingyu Wang,

Haifeng Sun,

Qi Qi,

Zirui Zhuang,

Jianxin Liao; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Kaixin and Ren, Pengfei and Wang, Jingyu and Sun, Haifeng and Qi, Qi and Zhuang, Zirui and Liao, Jianxin}, title = {Pose-Guided Temporal Enhancement for Robust Low-Resolution Hand Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22627-22637} }
ReDiffDet: Rotation-equivariant Diffusion Model for Oriented Object Detection: Jiaqi Zhao,

Zeyu Ding,

Yong Zhou,

Hancheng Zhu,

Wen-Liang Du,

Rui Yao; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Jiaqi and Ding, Zeyu and Zhou, Yong and Zhu, Hancheng and Du, Wen-Liang and Yao, Rui}, title = {ReDiffDet: Rotation-equivariant Diffusion Model for Oriented Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24429-24439} }
Unlocking Generalization Power in LiDAR Point Cloud Registration: Zhenxuan Zeng,

Qiao Wu,

Xiyu Zhang,

Lin Yuanbo Wu,

Pei An,

Jiaqi Yang,

Ji Wang,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2025_CVPR, author = {Zeng, Zhenxuan and Wu, Qiao and Zhang, Xiyu and Wu, Lin Yuanbo and An, Pei and Yang, Jiaqi and Wang, Ji and Wang, Peng}, title = {Unlocking Generalization Power in LiDAR Point Cloud Registration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22244-22253} }
LoRA Recycle: Unlocking Tuning-Free Few-Shot Adaptability in Visual Foundation Models by Recycling Pre-Tuned LoRAs: Zixuan Hu,

Yongxian Wei,

Li Shen,

Chun Yuan,

Dacheng Tao; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Zixuan and Wei, Yongxian and Shen, Li and Yuan, Chun and Tao, Dacheng}, title = {LoRA Recycle: Unlocking Tuning-Free Few-Shot Adaptability in Visual Foundation Models by Recycling Pre-Tuned LoRAs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25026-25037} }
Event Fields: Capturing Light Fields at High Speed, Resolution, and Dynamic Range: Ziyuan Qu,

Zihao Zou,

Vivek Boominathan,

Praneeth Chakravarthula,

Adithya Pediredla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Ziyuan and Zou, Zihao and Boominathan, Vivek and Chakravarthula, Praneeth and Pediredla, Adithya}, title = {Event Fields: Capturing Light Fields at High Speed, Resolution, and Dynamic Range}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26910-26920} }
HyperFree: A Channel-adaptive and Tuning-free Foundation Model for Hyperspectral Remote Sensing Imagery: Jingtao Li,

Yingyi Liu,

Xinyu Wang,

Yunning Peng,

Chen Sun,

Shaoyu Wang,

Zhendong Sun,

Tian Ke,

Xiao Jiang,

Tangwei Lu,

Anran Zhao,

Yanfei Zhong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jingtao and Liu, Yingyi and Wang, Xinyu and Peng, Yunning and Sun, Chen and Wang, Shaoyu and Sun, Zhendong and Ke, Tian and Jiang, Xiao and Lu, Tangwei and Zhao, Anran and Zhong, Yanfei}, title = {HyperFree: A Channel-adaptive and Tuning-free Foundation Model for Hyperspectral Remote Sensing Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23048-23058} }
GBlobs: Explicit Local Structure via Gaussian Blobs for Improved Cross-Domain LiDAR-based 3D Object Detection: Dušan Malić,

Christian Fruhwirth-Reisinger,

Samuel Schulter,

Horst Possegger; [pdf] [supp]
[bibtex]
@InProceedings{Malic_2025_CVPR, author = {Mali\'c, Du\v{s}an and Fruhwirth-Reisinger, Christian and Schulter, Samuel and Possegger, Horst}, title = {GBlobs: Explicit Local Structure via Gaussian Blobs for Improved Cross-Domain LiDAR-based 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27357-27367} }
3D Gaussian Head Avatars with Expressive Dynamic Appearances by Compact Tensorial Representations: Yating Wang,

Xuan Wang,

Ran Yi,

Yanbo Fan,

Jichen Hu,

Jingcheng Zhu,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yating and Wang, Xuan and Yi, Ran and Fan, Yanbo and Hu, Jichen and Zhu, Jingcheng and Ma, Lizhuang}, title = {3D Gaussian Head Avatars with Expressive Dynamic Appearances by Compact Tensorial Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21117-21126} }
MambaIRv2: Attentive State Space Restoration: Hang Guo,

Yong Guo,

Yaohua Zha,

Yulun Zhang,

Wenbo Li,

Tao Dai,

Shu-Tao Xia,

Yawei Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Hang and Guo, Yong and Zha, Yaohua and Zhang, Yulun and Li, Wenbo and Dai, Tao and Xia, Shu-Tao and Li, Yawei}, title = {MambaIRv2: Attentive State Space Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28124-28133} }
Floating No More: Object-Ground Reconstruction from a Single Image: Yunze Man,

Yichen Sheng,

Jianming Zhang,

Liang-Yan Gui,

Yu-Xiong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Man_2025_CVPR, author = {Man, Yunze and Sheng, Yichen and Zhang, Jianming and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Floating No More: Object-Ground Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27134-27143} }
Pattern Analogies: Learning to Perform Programmatic Image Edits by Analogy: Aditya Ganeshan,

Thibault Groueix,

Paul Guerrero,

Radomir Mech,

Matthew Fisher,

Daniel Ritchie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganeshan_2025_CVPR, author = {Ganeshan, Aditya and Groueix, Thibault and Guerrero, Paul and Mech, Radomir and Fisher, Matthew and Ritchie, Daniel}, title = {Pattern Analogies: Learning to Perform Programmatic Image Edits by Analogy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28715-28725} }
STAR-Edge: Structure-aware Local Spherical Curve Representation for Thin-walled Edge Extraction from Unstructured Point Clouds: Zikuan Li,

Honghua Chen,

Yuecheng Wang,

Sibo Wu,

Mingqiang Wei,

Jun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zikuan and Chen, Honghua and Wang, Yuecheng and Wu, Sibo and Wei, Mingqiang and Wang, Jun}, title = {STAR-Edge: Structure-aware Local Spherical Curve Representation for Thin-walled Edge Extraction from Unstructured Point Clouds}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27254-27263} }
Boosting the Dual-Stream Architecture in Ultra-High Resolution Segmentation with Resolution-Biased Uncertainty Estimation: Rong Qin,

Xingyu Liu,

Jinglei Shi,

Liang Lin,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Qin_2025_CVPR, author = {Qin, Rong and Liu, Xingyu and Shi, Jinglei and Lin, Liang and Yang, Jufeng}, title = {Boosting the Dual-Stream Architecture in Ultra-High Resolution Segmentation with Resolution-Biased Uncertainty Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25960-25970} }
pFedMxF: Personalized Federated Class-Incremental Learning with Mixture of Frequency Aggregation: Yifei Zhang,

Hao Zhu,

Alysa Ziying Tan,

Dianzhi Yu,

Longtao Huang,

Han Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yifei and Zhu, Hao and Tan, Alysa Ziying and Yu, Dianzhi and Huang, Longtao and Yu, Han}, title = {pFedMxF: Personalized Federated Class-Incremental Learning with Mixture of Frequency Aggregation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30640-30650} }
Efficient Transfer Learning for Video-language Foundation Models: Haoxing Chen,

Zizheng Huang,

Yan Hong,

Yanshuo Wang,

Zhongcai Lyu,

Zhuoer Xu,

Jun Lan,

Zhangxuan Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Haoxing and Huang, Zizheng and Hong, Yan and Wang, Yanshuo and Lyu, Zhongcai and Xu, Zhuoer and Lan, Jun and Gu, Zhangxuan}, title = {Efficient Transfer Learning for Video-language Foundation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29129-29138} }
Radio Frequency Ray Tracing with Neural Object Representation for Enhanced RF Modeling: Xingyu Chen,

Zihao Feng,

Kun Qian,

Xinyu Zhang; [pdf]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xingyu and Feng, Zihao and Qian, Kun and Zhang, Xinyu}, title = {Radio Frequency Ray Tracing with Neural Object Representation for Enhanced RF Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21339-21348} }
Neuro-3D: Towards 3D Visual Decoding from EEG Signals: Zhanqiang Guo,

Jiamin Wu,

Yonghao Song,

Jiahui Bu,

Weijian Mai,

Qihao Zheng,

Wanli Ouyang,

Chunfeng Song; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Zhanqiang and Wu, Jiamin and Song, Yonghao and Bu, Jiahui and Mai, Weijian and Zheng, Qihao and Ouyang, Wanli and Song, Chunfeng}, title = {Neuro-3D: Towards 3D Visual Decoding from EEG Signals}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23870-23880} }
Probing the Mid-level Vision Capabilities of Self-Supervised Learning: Xuweiyi Chen,

Markus Marks,

Zezhou Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Xuweiyi and Marks, Markus and Cheng, Zezhou}, title = {Probing the Mid-level Vision Capabilities of Self-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30095-30105} }
Efficient Long Video Tokenization via Coordinate-based Patch Reconstruction: Huiwon Jang,

Sihyun Yu,

Jinwoo Shin,

Pieter Abbeel,

Younggyo Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2025_CVPR, author = {Jang, Huiwon and Yu, Sihyun and Shin, Jinwoo and Abbeel, Pieter and Seo, Younggyo}, title = {Efficient Long Video Tokenization via Coordinate-based Patch Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22853-22863} }
Derivative-Free Diffusion Manifold-Constrained Gradient for Unified XAI: Won Jun Kim,

Hyungjin Chung,

Jaemin Kim,

Sangmin Lee,

Byeongsu Sim,

Jong Chul Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Won Jun and Chung, Hyungjin and Kim, Jaemin and Lee, Sangmin and Sim, Byeongsu and Ye, Jong Chul}, title = {Derivative-Free Diffusion Manifold-Constrained Gradient for Unified XAI}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23795-23805} }
ZoomLDM: Latent Diffusion Model for Multi-scale Image Generation: Srikar Yellapragada,

Alexandros Graikos,

Kostas Triaridis,

Prateek Prasanna,

Rajarsi Gupta,

Joel Saltz,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yellapragada_2025_CVPR, author = {Yellapragada, Srikar and Graikos, Alexandros and Triaridis, Kostas and Prasanna, Prateek and Gupta, Rajarsi and Saltz, Joel and Samaras, Dimitris}, title = {ZoomLDM: Latent Diffusion Model for Multi-scale Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23453-23463} }
GaussianUDF: Inferring Unsigned Distance Functions through 3D Gaussian Splatting: Shujuan Li,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Shujuan and Liu, Yu-Shen and Han, Zhizhong}, title = {GaussianUDF: Inferring Unsigned Distance Functions through 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27113-27123} }
CrossSDF: 3D Reconstruction of Thin Structures From Cross-Sections: Thomas Walker,

Salvatore Esposito,

Daniel Rebain,

Amir Vaxman,

Arno Onken,

Changjian Li,

Oisin Mac Aodha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Walker_2025_CVPR, author = {Walker, Thomas and Esposito, Salvatore and Rebain, Daniel and Vaxman, Amir and Onken, Arno and Li, Changjian and Mac Aodha, Oisin}, title = {CrossSDF: 3D Reconstruction of Thin Structures From Cross-Sections}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30928-30937} }
DV-Matcher: Deformation-based Non-rigid Point Cloud Matching Guided by Pre-trained Visual Features: Zhangquan Chen,

Puhua Jiang,

Ruqi Huang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhangquan and Jiang, Puhua and Huang, Ruqi}, title = {DV-Matcher: Deformation-based Non-rigid Point Cloud Matching Guided by Pre-trained Visual Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27264-27274} }
Reasoning Mamba: Hypergraph-Guided Region Relation Calculating for Weakly Supervised Affordance Grounding: Yuxuan Wang,

Aming Wu,

Muli Yang,

Yukuan Min,

Yihang Zhu,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuxuan and Wu, Aming and Yang, Muli and Min, Yukuan and Zhu, Yihang and Deng, Cheng}, title = {Reasoning Mamba: Hypergraph-Guided Region Relation Calculating for Weakly Supervised Affordance Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27618-27627} }
Adaptive Part Learning for Fine-Grained Generalized Category Discovery: A Plug-and-Play Enhancement: Qiyuan Dai,

Hanzhuo Huang,

Yu Wu,

Sibei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dai_2025_CVPR, author = {Dai, Qiyuan and Huang, Hanzhuo and Wu, Yu and Yang, Sibei}, title = {Adaptive Part Learning for Fine-Grained Generalized Category Discovery: A Plug-and-Play Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25444-25453} }
FLAIR: VLM with Fine-grained Language-informed Image Representations: Rui Xiao,

Sanghwan Kim,

Mariana-Iuliana Georgescu,

Zeynep Akata,

Stephan Alaniz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Rui and Kim, Sanghwan and Georgescu, Mariana-Iuliana and Akata, Zeynep and Alaniz, Stephan}, title = {FLAIR: VLM with Fine-grained Language-informed Image Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24884-24894} }
GG-SSMs: Graph-Generating State Space Models: Nikola Zubic,

Davide Scaramuzza; [pdf] [supp]
[bibtex]
@InProceedings{Zubic_2025_CVPR, author = {Zubic, Nikola and Scaramuzza, Davide}, title = {GG-SSMs: Graph-Generating State Space Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28863-28873} }
Continuous Adverse Weather Removal via Degradation-Aware Distillation: Xin Lu,

Jie Xiao,

Yurui Zhu,

Xueyang Fu; [pdf]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Xin and Xiao, Jie and Zhu, Yurui and Fu, Xueyang}, title = {Continuous Adverse Weather Removal via Degradation-Aware Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28113-28123} }
Exploiting Temporal State Space Sharing for Video Semantic Segmentation: Syed Ariff Syed Hesham,

Yun Liu,

Guolei Sun,

Henghui Ding,

Jing Yang,

Ender Konukoglu,

Xue Geng,

Xudong Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hesham_2025_CVPR, author = {Hesham, Syed Ariff Syed and Liu, Yun and Sun, Guolei and Ding, Henghui and Yang, Jing and Konukoglu, Ender and Geng, Xue and Jiang, Xudong}, title = {Exploiting Temporal State Space Sharing for Video Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24211-24221} }
High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model: Yiyang Shen,

Kun Zhou,

He Wang,

Yin Yang,

Tianjia Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2025_CVPR, author = {Shen, Yiyang and Zhou, Kun and Wang, He and Yang, Yin and Shao, Tianjia}, title = {High-fidelity 3D Object Generation from Single Image with RGBN-Volume Gaussian Reconstruction Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21558-21569} }
Steepest Descent Density Control for Compact 3D Gaussian Splatting: Peihao Wang,

Yuehao Wang,

Dilin Wang,

Sreyas Mohan,

Zhiwen Fan,

Lemeng Wu,

Ruisi Cai,

Yu-Ying Yeh,

Zhangyang Wang,

Qiang Liu,

Rakesh Ranjan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Peihao and Wang, Yuehao and Wang, Dilin and Mohan, Sreyas and Fan, Zhiwen and Wu, Lemeng and Cai, Ruisi and Yeh, Yu-Ying and Wang, Zhangyang and Liu, Qiang and Ranjan, Rakesh}, title = {Steepest Descent Density Control for Compact 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26663-26672} }
Optimal Transport-Guided Source-Free Adaptation for Face Anti-Spoofing: Zhuowei Li,

Tianchen Zhao,

Xiang Xu,

Zheng Zhang,

Zhihua Li,

Xuanbai Chen,

Qin Zhang,

Alessandro Bergamo,

Anil K. Jain,

Yifan Xing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Zhuowei and Zhao, Tianchen and Xu, Xiang and Zhang, Zheng and Li, Zhihua and Chen, Xuanbai and Zhang, Qin and Bergamo, Alessandro and Jain, Anil K. and Xing, Yifan}, title = {Optimal Transport-Guided Source-Free Adaptation for Face Anti-Spoofing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24351-24363} }
Robust 3D Shape Reconstruction in Zero-Shot from a Single Image in the Wild: Junhyeong Cho,

Kim Youwang,

Hunmin Yang,

Tae-Hyun Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cho_2025_CVPR, author = {Cho, Junhyeong and Youwang, Kim and Yang, Hunmin and Oh, Tae-Hyun}, title = {Robust 3D Shape Reconstruction in Zero-Shot from a Single Image in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22786-22798} }
BOE-ViT: Boosting Orientation Estimation with Equivariance in Self-Supervised 3D Subtomogram Alignment: Runmin Jiang,

Jackson Daggett,

Shriya Pingulkar,

Yizhou Zhao,

Priyanshu Dhingra,

Daniel Brown,

Qifeng Wu,

Xiangrui Zeng,

Xingjian Li,

Min Xu; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Runmin and Daggett, Jackson and Pingulkar, Shriya and Zhao, Yizhou and Dhingra, Priyanshu and Brown, Daniel and Wu, Qifeng and Zeng, Xiangrui and Li, Xingjian and Xu, Min}, title = {BOE-ViT: Boosting Orientation Estimation with Equivariance in Self-Supervised 3D Subtomogram Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29352-29362} }
Adventurer: Optimizing Vision Mamba Architecture Designs for Efficiency: Feng Wang,

Timing Yang,

Yaodong Yu,

Sucheng Ren,

Guoyizhe Wei,

Angtian Wang,

Wei Shao,

Yuyin Zhou,

Alan Yuille,

Cihang Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Feng and Yang, Timing and Yu, Yaodong and Ren, Sucheng and Wei, Guoyizhe and Wang, Angtian and Shao, Wei and Zhou, Yuyin and Yuille, Alan and Xie, Cihang}, title = {Adventurer: Optimizing Vision Mamba Architecture Designs for Efficiency}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30157-30166} }
Beyond Local Sharpness: Communication-Efficient Global Sharpness-aware Minimization for Federated Learning: Debora Caldarola,

Pietro Cagnasso,

Barbara Caputo,

Marco Ciccone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Caldarola_2025_CVPR, author = {Caldarola, Debora and Cagnasso, Pietro and Caputo, Barbara and Ciccone, Marco}, title = {Beyond Local Sharpness: Communication-Efficient Global Sharpness-aware Minimization for Federated Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25187-25197} }
Parameterized Blur Kernel Prior Learning for Local Motion Deblurring: Zhenxuan Fang,

Fangfang Wu,

Tao Huang,

Le Dong,

Weisheng Dong,

Xin Li,

Guangming Shi; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2025_CVPR, author = {Fang, Zhenxuan and Wu, Fangfang and Huang, Tao and Dong, Le and Dong, Weisheng and Li, Xin and Shi, Guangming}, title = {Parameterized Blur Kernel Prior Learning for Local Motion Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23006-23015} }
Scene4U: Hierarchical Layered 3D Scene Reconstruction from Single Panoramic Image for Your Immerse Exploration: Zilong Huang,

Jun He,

Junyan Ye,

Lihan Jiang,

Weijia Li,

Yiping Chen,

Ting Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zilong and He, Jun and Ye, Junyan and Jiang, Lihan and Li, Weijia and Chen, Yiping and Han, Ting}, title = {Scene4U: Hierarchical Layered 3D Scene Reconstruction from Single Panoramic Image for Your Immerse Exploration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26723-26733} }
ACAttack: Adaptive Cross Attacking RGB-T Tracker via Multi-Modal Response Decoupling: Xinyu Xiang,

Qinglong Yan,

Hao Zhang,

Jiayi Ma; [pdf]
[bibtex]
@InProceedings{Xiang_2025_CVPR, author = {Xiang, Xinyu and Yan, Qinglong and Zhang, Hao and Ma, Jiayi}, title = {ACAttack: Adaptive Cross Attacking RGB-T Tracker via Multi-Modal Response Decoupling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22099-22108} }
DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in Long Videos: Zijia Lu,

A S M Iftekhar,

Gaurav Mittal,

Tianjian Meng,

Xiawei Wang,

Cheng Zhao,

Rohith Kukkala,

Ehsan Elhamifar,

Mei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Zijia and Iftekhar, A S M and Mittal, Gaurav and Meng, Tianjian and Wang, Xiawei and Zhao, Cheng and Kukkala, Rohith and Elhamifar, Ehsan and Chen, Mei}, title = {DeCafNet: Delegate and Conquer for Efficient Temporal Grounding in Long Videos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24066-24076} }
HoGS: Unified Near and Far Object Reconstruction via Homogeneous Gaussian Splatting: Xinpeng Liu,

Zeyi Huang,

Fumio Okura,

Yasuyuki Matsushita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xinpeng and Huang, Zeyi and Okura, Fumio and Matsushita, Yasuyuki}, title = {HoGS: Unified Near and Far Object Reconstruction via Homogeneous Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26714-26722} }
SmartEraser: Remove Anything from Images using Masked-Region Guidance: Longtao Jiang,

Zhendong Wang,

Jianmin Bao,

Wengang Zhou,

Dongdong Chen,

Lei Shi,

Dong Chen,

Houqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Longtao and Wang, Zhendong and Bao, Jianmin and Zhou, Wengang and Chen, Dongdong and Shi, Lei and Chen, Dong and Li, Houqiang}, title = {SmartEraser: Remove Anything from Images using Masked-Region Guidance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24452-24462} }
Sample- and Parameter-Efficient Auto-Regressive Image Models: Elad Amrani,

Leonid Karlinsky,

Alex Bronstein; [pdf] [supp]
[bibtex]
@InProceedings{Amrani_2025_CVPR, author = {Amrani, Elad and Karlinsky, Leonid and Bronstein, Alex}, title = {Sample- and Parameter-Efficient Auto-Regressive Image Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30127-30136} }
Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent Alignment: Chen Liu,

Peike Li,

Liying Yang,

Dadong Wang,

Lincheng Li,

Xin Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Chen and Li, Peike and Yang, Liying and Wang, Dadong and Li, Lincheng and Yu, Xin}, title = {Robust Audio-Visual Segmentation via Audio-Guided Visual Convergent Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28922-28931} }
BiLoRA: Almost-Orthogonal Parameter Spaces for Continual Learning: Hao Zhu,

Yifei Zhang,

Junhao Dong,

Piotr Koniusz; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Hao and Zhang, Yifei and Dong, Junhao and Koniusz, Piotr}, title = {BiLoRA: Almost-Orthogonal Parameter Spaces for Continual Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25613-25622} }
Vid2Sim: Generalizable, Video-based Reconstruction of Appearance, Geometry and Physics for Mesh-free Simulation: Chuhao Chen,

Zhiyang Dou,

Chen Wang,

Yiming Huang,

Anjun Chen,

Qiao Feng,

Jiatao Gu,

Lingjie Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Chuhao and Dou, Zhiyang and Wang, Chen and Huang, Yiming and Chen, Anjun and Feng, Qiao and Gu, Jiatao and Liu, Lingjie}, title = {Vid2Sim: Generalizable, Video-based Reconstruction of Appearance, Geometry and Physics for Mesh-free Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26545-26555} }
SceneTAP: Scene-Coherent Typographic Adversarial Planner against Vision-Language Models in Real-World Environments: Yue Cao,

Yun Xing,

Jie Zhang,

Di Lin,

Tianwei Zhang,

Ivor Tsang,

Yang Liu,

Qing Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2025_CVPR, author = {Cao, Yue and Xing, Yun and Zhang, Jie and Lin, Di and Zhang, Tianwei and Tsang, Ivor and Liu, Yang and Guo, Qing}, title = {SceneTAP: Scene-Coherent Typographic Adversarial Planner against Vision-Language Models in Real-World Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25050-25059} }
Collaborative Decoding Makes Visual Auto-Regressive Modeling Efficient: Zigeng Chen,

Xinyin Ma,

Gongfan Fang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zigeng and Ma, Xinyin and Fang, Gongfan and Wang, Xinchao}, title = {Collaborative Decoding Makes Visual Auto-Regressive Modeling Efficient}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23334-23344} }
AerialMegaDepth: Learning Aerial-Ground Reconstruction and View Synthesis: Khiem Vuong,

Anurag Ghosh,

Deva Ramanan,

Srinivasa Narasimhan,

Shubham Tulsiani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vuong_2025_CVPR, author = {Vuong, Khiem and Ghosh, Anurag and Ramanan, Deva and Narasimhan, Srinivasa and Tulsiani, Shubham}, title = {AerialMegaDepth: Learning Aerial-Ground Reconstruction and View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21674-21684} }
Visual Representation Learning through Causal Intervention for Controllable Image Editing: Shanshan Huang,

Haoxuan Li,

Chunyuan Zheng,

Lei Wang,

Guorui Liao,

Zhili Gong,

Huayi Yang,

Li Liu; [pdf]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Shanshan and Li, Haoxuan and Zheng, Chunyuan and Wang, Lei and Liao, Guorui and Gong, Zhili and Yang, Huayi and Liu, Li}, title = {Visual Representation Learning through Causal Intervention for Controllable Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23484-23493} }
Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis: Bingda Tang,

Boyang Zheng,

Sayak Paul,

Saining Xie; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Bingda and Zheng, Boyang and Paul, Sayak and Xie, Saining}, title = {Exploring the Deep Fusion of Large Language Models and Diffusion Transformers for Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28586-28595} }
A Comprehensive Study of Decoder-Only LLMs for Text-to-Image Generation: Andrew Z. Wang,

Songwei Ge,

Tero Karras,

Ming-Yu Liu,

Yogesh Balaji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Andrew Z. and Ge, Songwei and Karras, Tero and Liu, Ming-Yu and Balaji, Yogesh}, title = {A Comprehensive Study of Decoder-Only LLMs for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28575-28585} }
Deformable Radial Kernel Splatting: Yi-Hua Huang,

Ming-Xian Lin,

Yang-Tian Sun,

Ziyi Yang,

Xiaoyang Lyu,

Yan-Pei Cao,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Yi-Hua and Lin, Ming-Xian and Sun, Yang-Tian and Yang, Ziyi and Lyu, Xiaoyang and Cao, Yan-Pei and Qi, Xiaojuan}, title = {Deformable Radial Kernel Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21513-21523} }
Bayesian Prompt Flow Learning for Zero-Shot Anomaly Detection: Zhen Qu,

Xian Tao,

Xinyi Gong,

ShiChen Qu,

Qiyu Chen,

Zhengtao Zhang,

Xingang Wang,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2025_CVPR, author = {Qu, Zhen and Tao, Xian and Gong, Xinyi and Qu, ShiChen and Chen, Qiyu and Zhang, Zhengtao and Wang, Xingang and Ding, Guiguang}, title = {Bayesian Prompt Flow Learning for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30398-30408} }
HalLoc: Token-level Localization of Hallucinations for Vision Language Models: Eunkyu Park,

Minyeong Kim,

Gunhee Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2025_CVPR, author = {Park, Eunkyu and Kim, Minyeong and Kim, Gunhee}, title = {HalLoc: Token-level Localization of Hallucinations for Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29893-29903} }
DiffPortrait360: Consistent Portrait Diffusion for 360 View Synthesis: Yuming Gu,

Phong Tran,

Yujian Zheng,

Hongyi Xu,

Heyuan Li,

Adilbek Karmanov,

Hao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yuming and Tran, Phong and Zheng, Yujian and Xu, Hongyi and Li, Heyuan and Karmanov, Adilbek and Li, Hao}, title = {DiffPortrait360: Consistent Portrait Diffusion for 360 View Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26263-26273} }
SURGEON: Memory-Adaptive Fully Test-Time Adaptation via Dynamic Activation Sparsity: Ke Ma,

Jiaqi Tang,

Bin Guo,

Fan Dang,

Sicong Liu,

Zhui Zhu,

Lei Wu,

Cheng Fang,

Ying-Cong Chen,

Zhiwen Yu,

Yunhao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2025_CVPR, author = {Ma, Ke and Tang, Jiaqi and Guo, Bin and Dang, Fan and Liu, Sicong and Zhu, Zhui and Wu, Lei and Fang, Cheng and Chen, Ying-Cong and Yu, Zhiwen and Liu, Yunhao}, title = {SURGEON: Memory-Adaptive Fully Test-Time Adaptation via Dynamic Activation Sparsity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30514-30523} }
From Slow Bidirectional to Fast Autoregressive Video Diffusion Models: Tianwei Yin,

Qiang Zhang,

Richard Zhang,

William T. Freeman,

Fredo Durand,

Eli Shechtman,

Xun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2025_CVPR, author = {Yin, Tianwei and Zhang, Qiang and Zhang, Richard and Freeman, William T. and Durand, Fredo and Shechtman, Eli and Huang, Xun}, title = {From Slow Bidirectional to Fast Autoregressive Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22963-22974} }
Noise Diffusion for Enhancing Semantic Faithfulness in Text-to-Image Synthesis: Boming Miao,

Chunxiao Li,

Xiaoxiao Wang,

Andi Zhang,

Rui Sun,

Zizhe Wang,

Yao Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Miao_2025_CVPR, author = {Miao, Boming and Li, Chunxiao and Wang, Xiaoxiao and Zhang, Andi and Sun, Rui and Wang, Zizhe and Zhu, Yao}, title = {Noise Diffusion for Enhancing Semantic Faithfulness in Text-to-Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23575-23584} }
MonoInstance: Enhancing Monocular Priors via Multi-view Instance Alignment for Neural Rendering and Reconstruction: Wenyuan Zhang,

Yixiao Yang,

Han Huang,

Liang Han,

Kanle Shi,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wenyuan and Yang, Yixiao and Huang, Han and Han, Liang and Shi, Kanle and Liu, Yu-Shen and Han, Zhizhong}, title = {MonoInstance: Enhancing Monocular Priors via Multi-view Instance Alignment for Neural Rendering and Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21642-21653} }
CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models: Rundi Wu,

Ruiqi Gao,

Ben Poole,

Alex Trevithick,

Changxi Zheng,

Jonathan T. Barron,

Aleksander Holynski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Rundi and Gao, Ruiqi and Poole, Ben and Trevithick, Alex and Zheng, Changxi and Barron, Jonathan T. and Holynski, Aleksander}, title = {CAT4D: Create Anything in 4D with Multi-View Video Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26057-26068} }
Exploring Semantic Feature Discrimination for Perceptual Image Super-Resolution and Opinion-Unaware No-Reference Image Quality Assessment: Guanglu Dong,

Xiangyu Liao,

Mingyang Li,

Guihuan Guo,

Chao Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2025_CVPR, author = {Dong, Guanglu and Liao, Xiangyu and Li, Mingyang and Guo, Guihuan and Ren, Chao}, title = {Exploring Semantic Feature Discrimination for Perceptual Image Super-Resolution and Opinion-Unaware No-Reference Image Quality Assessment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28176-28187} }
Distilling Long-tailed Datasets: Zhenghao Zhao,

Haoxuan Wang,

Yuzhang Shang,

Kai Wang,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zhenghao and Wang, Haoxuan and Shang, Yuzhang and Wang, Kai and Yan, Yan}, title = {Distilling Long-tailed Datasets}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30609-30618} }
Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders: Fiona Ryan,

Ajay Bati,

Sangmin Lee,

Daniel Bolya,

Judy Hoffman,

James M. Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Ryan_2025_CVPR, author = {Ryan, Fiona and Bati, Ajay and Lee, Sangmin and Bolya, Daniel and Hoffman, Judy and Rehg, James M.}, title = {Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28874-28884} }
Incorporating Dense Knowledge Alignment into Unified Multimodal Representation Models: Yuhao Cui,

Xinxing Zu,

Wenhua Zhang,

Zhongzhou Zhao,

Jinyang Gao; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Yuhao and Zu, Xinxing and Zhang, Wenhua and Zhao, Zhongzhou and Gao, Jinyang}, title = {Incorporating Dense Knowledge Alignment into Unified Multimodal Representation Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29733-29743} }
Boost Your Human Image Generation Model via Direct Preference Optimization: Sanghyeon Na,

Yonggyu Kim,

Hyunjoon Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Na_2025_CVPR, author = {Na, Sanghyeon and Kim, Yonggyu and Lee, Hyunjoon}, title = {Boost Your Human Image Generation Model via Direct Preference Optimization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23551-23562} }
Learning to Highlight Audio by Watching Movies: Chao Huang,

Ruohan Gao,

J. M. F. Tsang,

Jan Kurcius,

Cagdas Bilen,

Chenliang Xu,

Anurag Kumar,

Sanjeel Parekh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Chao and Gao, Ruohan and Tsang, J. M. F. and Kurcius, Jan and Bilen, Cagdas and Xu, Chenliang and Kumar, Anurag and Parekh, Sanjeel}, title = {Learning to Highlight Audio by Watching Movies}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23925-23935} }
Unified Uncertainty-Aware Diffusion for Multi-Agent Trajectory Modeling: Guillem Capellera,

Antonio Rubio,

Luis Ferraz,

Antonio Agudo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Capellera_2025_CVPR, author = {Capellera, Guillem and Rubio, Antonio and Ferraz, Luis and Agudo, Antonio}, title = {Unified Uncertainty-Aware Diffusion for Multi-Agent Trajectory Modeling}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22476-22486} }
WeGen: A Unified Model for Interactive Multimodal Generation as We Chat: Zhipeng Huang,

Shaobin Zhuang,

Canmiao Fu,

Binxin Yang,

Ying Zhang,

Chong Sun,

Zhizheng Zhang,

Yali Wang,

Chen Li,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zhipeng and Zhuang, Shaobin and Fu, Canmiao and Yang, Binxin and Zhang, Ying and Sun, Chong and Zhang, Zhizheng and Wang, Yali and Li, Chen and Zha, Zheng-Jun}, title = {WeGen: A Unified Model for Interactive Multimodal Generation as We Chat}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23679-23689} }
HRAvatar: High-Quality and Relightable Gaussian Head Avatar: Dongbin Zhang,

Yunfei Liu,

Lijian Lin,

Ye Zhu,

Kangjie Chen,

Minghan Qin,

Yu Li,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Dongbin and Liu, Yunfei and Lin, Lijian and Zhu, Ye and Chen, Kangjie and Qin, Minghan and Li, Yu and Wang, Haoqian}, title = {HRAvatar: High-Quality and Relightable Gaussian Head Avatar}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26285-26296} }
A Distractor-Aware Memory for Visual Object Tracking with SAM2: Jovana Videnovic,

Alan Lukezic,

Matej Kristan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Videnovic_2025_CVPR, author = {Videnovic, Jovana and Lukezic, Alan and Kristan, Matej}, title = {A Distractor-Aware Memory for Visual Object Tracking with SAM2}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24255-24264} }
Activating Sparse Part Concepts for 3D Class Incremental Learning: Zhenya Tian,

Jun Xiao,

Lupeng Liu,

Haiyong Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Zhenya and Xiao, Jun and Liu, Lupeng and Jiang, Haiyong}, title = {Activating Sparse Part Concepts for 3D Class Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30343-30353} }
ProxyTransformation: Preshaping Point Cloud Manifold With Proxy Attention For 3D Visual Grounding: Qihang Peng,

Henry Zheng,

Gao Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Qihang and Zheng, Henry and Huang, Gao}, title = {ProxyTransformation: Preshaping Point Cloud Manifold With Proxy Attention For 3D Visual Grounding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24582-24592} }
BFANet: Revisiting 3D Semantic Segmentation with Boundary Feature Analysis: Weiguang Zhao,

Rui Zhang,

Qiufeng Wang,

Guangliang Cheng,

Kaizhu Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Weiguang and Zhang, Rui and Wang, Qiufeng and Cheng, Guangliang and Huang, Kaizhu}, title = {BFANet: Revisiting 3D Semantic Segmentation with Boundary Feature Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29395-29405} }
Beyond Words: Augmenting Discriminative Richness via Diffusions in Unsupervised Prompt Learning: Hairui Ren,

Fan Tang,

He Zhao,

Zixuan Wang,

Dandan Guo,

Yi Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Hairui and Tang, Fan and Zhao, He and Wang, Zixuan and Guo, Dandan and Chang, Yi}, title = {Beyond Words: Augmenting Discriminative Richness via Diffusions in Unsupervised Prompt Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25135-25144} }
Unlocking the Potential of Unlabeled Data in Semi-Supervised Domain Generalization: Dongkwan Lee,

Kyomin Hwang,

Nojun Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Dongkwan and Hwang, Kyomin and Kwak, Nojun}, title = {Unlocking the Potential of Unlabeled Data in Semi-Supervised Domain Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30599-30608} }
Steering Away from Harm: An Adaptive Approach to Defending Vision Language Model Against Jailbreaks: Han Wang,

Gang Wang,

Huan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Han and Wang, Gang and Zhang, Huan}, title = {Steering Away from Harm: An Adaptive Approach to Defending Vision Language Model Against Jailbreaks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29947-29957} }
Neural LightRig: Unlocking Accurate Object Normal and Material Estimation with Multi-Light Diffusion: Zexin He,

Tengfei Wang,

Xin Huang,

Xingang Pan,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Zexin and Wang, Tengfei and Huang, Xin and Pan, Xingang and Liu, Ziwei}, title = {Neural LightRig: Unlocking Accurate Object Normal and Material Estimation with Multi-Light Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26514-26524} }
Towards Natural Language-Based Document Image Retrieval: New Dataset and Benchmark: Hao Guo,

Xugong Qin,

Jun Jie Ou Yang,

Peng Zhang,

Gangyan Zeng,

Yubo Li,

Hailun Lin; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Hao and Qin, Xugong and Yang, Jun Jie Ou and Zhang, Peng and Zeng, Gangyan and Li, Yubo and Lin, Hailun}, title = {Towards Natural Language-Based Document Image Retrieval: New Dataset and Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29722-29732} }
Mitigating Ambiguities in 3D Classification with Gaussian Splatting: Ruiqi Zhang,

Hao Zhu,

Jingyi Zhao,

Qi Zhang,

Xun Cao,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ruiqi and Zhu, Hao and Zhao, Jingyi and Zhang, Qi and Cao, Xun and Ma, Zhan}, title = {Mitigating Ambiguities in 3D Classification with Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27275-27284} }
DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid Set-Embeddings Learning: Kun Zhang,

Jingyu Li,

Zhe Li,

S.Kevin Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Kun and Li, Jingyu and Li, Zhe and Zhou, S.Kevin}, title = {DH-Set: Improving Vision-Language Alignment with Diverse and Hybrid Set-Embeddings Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24993-25003} }
Unveil Inversion and Invariance in Flow Transformer for Versatile Image Editing: Pengcheng Xu,

Boyuan Jiang,

Xiaobin Hu,

Donghao Luo,

Qingdong He,

Jiangning Zhang,

Chengjie Wang,

Yunsheng Wu,

Charles Ling,

Boyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Pengcheng and Jiang, Boyuan and Hu, Xiaobin and Luo, Donghao and He, Qingdong and Zhang, Jiangning and Wang, Chengjie and Wu, Yunsheng and Ling, Charles and Wang, Boyu}, title = {Unveil Inversion and Invariance in Flow Transformer for Versatile Image Editing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28479-28489} }
DyCON: Dynamic Uncertainty-aware Consistency and Contrastive Learning for Semi-supervised Medical Image Segmentation: Maregu Assefa,

Muzammal Naseer,

Iyyakutti Iyappan Ganapathi,

Syed Sadaf Ali,

Mohamed L Seghier,

Naoufel Werghi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Assefa_2025_CVPR, author = {Assefa, Maregu and Naseer, Muzammal and Ganapathi, Iyyakutti Iyappan and Ali, Syed Sadaf and Seghier, Mohamed L and Werghi, Naoufel}, title = {DyCON: Dynamic Uncertainty-aware Consistency and Contrastive Learning for Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30850-30860} }
DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers: Mert Bülent Sarıyıldız,

Philippe Weinzaepfel,

Thomas Lucas,

Pau de Jorge,

Diane Larlus,

Yannis Kalantidis; [pdf] [supp]
[bibtex]
@InProceedings{Sariyildiz_2025_CVPR, author = {Sar{\i}y{\i}ld{\i}z, Mert B\"ulent and Weinzaepfel, Philippe and Lucas, Thomas and de Jorge, Pau and Larlus, Diane and Kalantidis, Yannis}, title = {DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30084-30094} }
Black Hole-Driven Identity Absorbing in Diffusion Models: Muhammad Shaheryar,

Jong Taek Lee,

Soon Ki Jung; [pdf] [supp]
[bibtex]
@InProceedings{Shaheryar_2025_CVPR, author = {Shaheryar, Muhammad and Lee, Jong Taek and Jung, Soon Ki}, title = {Black Hole-Driven Identity Absorbing in Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28544-28554} }
HiRes-LLaVA: Restoring Fragmentation Input in High-Resolution Large Vision-Language Models: Runhui Huang,

Xinpeng Ding,

Chunwei Wang,

Jianhua Han,

Yulong Liu,

Hengshuang Zhao,

Hang Xu,

Lu Hou,

Wei Zhang,

Xiaodan Liang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Runhui and Ding, Xinpeng and Wang, Chunwei and Han, Jianhua and Liu, Yulong and Zhao, Hengshuang and Xu, Hang and Hou, Lu and Zhang, Wei and Liang, Xiaodan}, title = {HiRes-LLaVA: Restoring Fragmentation Input in High-Resolution Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29814-29824} }
Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Video Diffusion Transformer: Jiahao Cui,

Hui Li,

Yun Zhan,

Hanlin Shang,

Kaihui Cheng,

Yuqi Ma,

Shan Mu,

Hang Zhou,

Jingdong Wang,

Siyu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2025_CVPR, author = {Cui, Jiahao and Li, Hui and Zhan, Yun and Shang, Hanlin and Cheng, Kaihui and Ma, Yuqi and Mu, Shan and Zhou, Hang and Wang, Jingdong and Zhu, Siyu}, title = {Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Video Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21086-21095} }
SeqMvRL: A Sequential Fusion Framework for Multi-view Representation Learning: Ren Wang,

Haoliang Sun,

Yuxiu Lin,

Chuanhui Zuo,

Yongshun Gong,

Yilong Yin,

Wenjia Meng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Ren and Sun, Haoliang and Lin, Yuxiu and Zuo, Chuanhui and Gong, Yongshun and Yin, Yilong and Meng, Wenjia}, title = {SeqMvRL: A Sequential Fusion Framework for Multi-view Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25822-25831} }
BadToken: Token-level Backdoor Attacks to Multi-modal Large Language Models: Zenghui Yuan,

Jiawen Shi,

Pan Zhou,

Neil Zhenqiang Gong,

Lichao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2025_CVPR, author = {Yuan, Zenghui and Shi, Jiawen and Zhou, Pan and Gong, Neil Zhenqiang and Sun, Lichao}, title = {BadToken: Token-level Backdoor Attacks to Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29927-29936} }
VLMs-Guided Representation Distillation for Efficient Vision-Based Reinforcement Learning: Haoran Xu,

Peixi Peng,

Guang Tan,

Yiqian Chang,

Luntong Li,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2025_CVPR, author = {Xu, Haoran and Peng, Peixi and Tan, Guang and Chang, Yiqian and Li, Luntong and Tian, Yonghong}, title = {VLMs-Guided Representation Distillation for Efficient Vision-Based Reinforcement Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29534-29544} }
NeISF++: Neural Incident Stokes Field for Polarized Inverse Rendering of Conductors and Dielectrics: Chenhao Li,

Taishi Ono,

Takeshi Uemori,

Sho Nitta,

Hajime Mihara,

Alexander Gatto,

Hajime Nagahara,

Yusuke Moriuchi; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Chenhao and Ono, Taishi and Uemori, Takeshi and Nitta, Sho and Mihara, Hajime and Gatto, Alexander and Nagahara, Hajime and Moriuchi, Yusuke}, title = {NeISF++: Neural Incident Stokes Field for Polarized Inverse Rendering of Conductors and Dielectrics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26493-26503} }
Non-Natural Image Understanding with Advancing Frequency-based Vision Encoders: Wang Lin,

QingSong Wang,

Yueying Feng,

Shulei Wang,

Tao Jin,

Zhou Zhao,

Fei Wu,

Chang Yao,

Jingyuan Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Wang and Wang, QingSong and Feng, Yueying and Wang, Shulei and Jin, Tao and Zhao, Zhou and Wu, Fei and Yao, Chang and Chen, Jingyuan}, title = {Non-Natural Image Understanding with Advancing Frequency-based Vision Encoders}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29756-29766} }
Generative Multimodal Pretraining with Discrete Diffusion Timestep Tokens: Kaihang Pan,

Wang Lin,

Zhongqi Yue,

Tenglong Ao,

Liyu Jia,

Wei Zhao,

Juncheng Li,

Siliang Tang,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2025_CVPR, author = {Pan, Kaihang and Lin, Wang and Yue, Zhongqi and Ao, Tenglong and Jia, Liyu and Zhao, Wei and Li, Juncheng and Tang, Siliang and Zhang, Hanwang}, title = {Generative Multimodal Pretraining with Discrete Diffusion Timestep Tokens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26136-26146} }
SplatFlow: Self-Supervised Dynamic Gaussian Splatting in Neural Motion Flow Field for Autonomous Driving: Su Sun,

Cheng Zhao,

Zhuoyang Sun,

Yingjie Victor Chen,

Mei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Su and Zhao, Cheng and Sun, Zhuoyang and Chen, Yingjie Victor and Chen, Mei}, title = {SplatFlow: Self-Supervised Dynamic Gaussian Splatting in Neural Motion Flow Field for Autonomous Driving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27487-27496} }
AesthetiQ: Enhancing Graphic Layout Design via Aesthetic-Aware Preference Alignment of Multi-modal Large Language Models: Sohan Patnaik,

Rishabh Jain,

Balaji Krishnamurthy,

Mausoom Sarkar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patnaik_2025_CVPR, author = {Patnaik, Sohan and Jain, Rishabh and Krishnamurthy, Balaji and Sarkar, Mausoom}, title = {AesthetiQ: Enhancing Graphic Layout Design via Aesthetic-Aware Preference Alignment of Multi-modal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23701-23711} }
FINECAPTION: Compositional Image Captioning Focusing on Wherever You Want at Any Granularity: Hang Hua,

Qing Liu,

Lingzhi Zhang,

Jing Shi,

Soo Ye Kim,

Zhifei Zhang,

Yilin Wang,

Jianming Zhang,

Zhe Lin,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2025_CVPR, author = {Hua, Hang and Liu, Qing and Zhang, Lingzhi and Shi, Jing and Kim, Soo Ye and Zhang, Zhifei and Wang, Yilin and Zhang, Jianming and Lin, Zhe and Luo, Jiebo}, title = {FINECAPTION: Compositional Image Captioning Focusing on Wherever You Want at Any Granularity}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24763-24773} }
Chebyshev Attention Depth Permutation Texture Network with Latent Texture Attribute Loss: Ravishankar Evani,

Deepu Rajan,

Shangbo Mao; [pdf] [supp]
[bibtex]
@InProceedings{Evani_2025_CVPR, author = {Evani, Ravishankar and Rajan, Deepu and Mao, Shangbo}, title = {Chebyshev Attention Depth Permutation Texture Network with Latent Texture Attribute Loss}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23423-23432} }
Decentralized Diffusion Models: David McAllister,

Matthew Tancik,

Jiaming Song,

Angjoo Kanazawa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{McAllister_2025_CVPR, author = {McAllister, David and Tancik, Matthew and Song, Jiaming and Kanazawa, Angjoo}, title = {Decentralized Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23323-23333} }
AnyEdit: Mastering Unified High-Quality Image Editing for Any Idea: Qifan Yu,

Wei Chow,

Zhongqi Yue,

Kaihang Pan,

Yang Wu,

Xiaoyang Wan,

Juncheng Li,

Siliang Tang,

Hanwang Zhang,

Yueting Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Qifan and Chow, Wei and Yue, Zhongqi and Pan, Kaihang and Wu, Yang and Wan, Xiaoyang and Li, Juncheng and Tang, Siliang and Zhang, Hanwang and Zhuang, Yueting}, title = {AnyEdit: Mastering Unified High-Quality Image Editing for Any Idea}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26125-26135} }
DNF: Unconditional 4D Generation with Dictionary-based Neural Fields: Xinyi Zhang,

Naiqi Li,

Angela Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Xinyi and Li, Naiqi and Dai, Angela}, title = {DNF: Unconditional 4D Generation with Dictionary-based Neural Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26047-26056} }
ARM: Appearance Reconstruction Model for Relightable 3D Generation: Xiang Feng,

Chang Yu,

Zoubin Bi,

Yintong Shang,

Feng Gao,

Hongzhi Wu,

Kun Zhou,

Chenfanfu Jiang,

Yin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2025_CVPR, author = {Feng, Xiang and Yu, Chang and Bi, Zoubin and Shang, Yintong and Gao, Feng and Wu, Hongzhi and Zhou, Kun and Jiang, Chenfanfu and Yang, Yin}, title = {ARM: Appearance Reconstruction Model for Relightable 3D Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21425-21437} }
Ground-V: Teaching VLMs to Ground Complex Instructions in Pixels: Yongshuo Zong,

Qin Zhang,

Dongsheng An,

Zhihua Li,

Xiang Xu,

Linghan Xu,

Zhuowen Tu,

Yifan Xing,

Onkar Dabeer; [pdf] [supp]
[bibtex]
@InProceedings{Zong_2025_CVPR, author = {Zong, Yongshuo and Zhang, Qin and An, Dongsheng and Li, Zhihua and Xu, Xiang and Xu, Linghan and Tu, Zhuowen and Xing, Yifan and Dabeer, Onkar}, title = {Ground-V: Teaching VLMs to Ground Complex Instructions in Pixels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24635-24645} }
TreeMeshGPT: Artistic Mesh Generation with Autoregressive Tree Sequencing: Stefan Lionar,

Jiabin Liang,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lionar_2025_CVPR, author = {Lionar, Stefan and Liang, Jiabin and Lee, Gim Hee}, title = {TreeMeshGPT: Artistic Mesh Generation with Autoregressive Tree Sequencing}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26608-26617} }
Generating 3D-Consistent Videos from Unposed Internet Photos: Gene Chou,

Kai Zhang,

Sai Bi,

Hao Tan,

Zexiang Xu,

Fujun Luan,

Bharath Hariharan,

Noah Snavely; [pdf] [arXiv]
[bibtex]
@InProceedings{Chou_2025_CVPR, author = {Chou, Gene and Zhang, Kai and Bi, Sai and Tan, Hao and Xu, Zexiang and Luan, Fujun and Hariharan, Bharath and Snavely, Noah}, title = {Generating 3D-Consistent Videos from Unposed Internet Photos}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27934-27945} }
Parameter Efficient Mamba Tuning via Projector-targeted Diagonal-centric Linear Transformation: Seokil Ham,

Hee-Seon Kim,

Sangmin Woo,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ham_2025_CVPR, author = {Ham, Seokil and Kim, Hee-Seon and Woo, Sangmin and Kim, Changick}, title = {Parameter Efficient Mamba Tuning via Projector-targeted Diagonal-centric Linear Transformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30106-30115} }
ViUniT: Visual Unit Tests for More Robust Visual Programming: Artemis Panagopoulou,

Honglu Zhou,

Silvio Savarese,

Caiming Xiong,

Chris Callison-Burch,

Mark Yatskar,

Juan Carlos Niebles; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Panagopoulou_2025_CVPR, author = {Panagopoulou, Artemis and Zhou, Honglu and Savarese, Silvio and Xiong, Caiming and Callison-Burch, Chris and Yatskar, Mark and Niebles, Juan Carlos}, title = {ViUniT: Visual Unit Tests for More Robust Visual Programming}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24646-24656} }
DualTalk: Dual-Speaker Interaction for 3D Talking Head Conversations: Ziqiao Peng,

Yanbo Fan,

Haoyu Wu,

Xuan Wang,

Hongyan Liu,

Jun He,

Zhaoxin Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2025_CVPR, author = {Peng, Ziqiao and Fan, Yanbo and Wu, Haoyu and Wang, Xuan and Liu, Hongyan and He, Jun and Fan, Zhaoxin}, title = {DualTalk: Dual-Speaker Interaction for 3D Talking Head Conversations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21055-21064} }
beta-FFT: Nonlinear Interpolation and Differentiated Training Strategies for Semi-Supervised Medical Image Segmentation: Ming Hu,

Jianfu Yin,

Zhuangzhuang Ma,

Jianheng Ma,

Feiyu Zhu,

Bingbing Wu,

Ya Wen,

Meng Wu,

Cong Hu,

Bingliang Hu,

Quan Wang; [pdf]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Ming and Yin, Jianfu and Ma, Zhuangzhuang and Ma, Jianheng and Zhu, Feiyu and Wu, Bingbing and Wen, Ya and Wu, Meng and Hu, Cong and Hu, Bingliang and Wang, Quan}, title = {beta-FFT: Nonlinear Interpolation and Differentiated Training Strategies for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30839-30849} }
Dynamic Group Normalization: Spatio-Temporal Adaptation to Evolving Data Statistics: Yair Smadar,

Assaf Hoogi; [pdf] [supp]
[bibtex]
@InProceedings{Smadar_2025_CVPR, author = {Smadar, Yair and Hoogi, Assaf}, title = {Dynamic Group Normalization: Spatio-Temporal Adaptation to Evolving Data Statistics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30167-30177} }
SynerGen-VL: Towards Synergistic Image Understanding and Generation with Vision Experts and Token Folding: Hao Li,

Changyao Tian,

Jie Shao,

Xizhou Zhu,

Zhaokai Wang,

Jinguo Zhu,

Wenhan Dou,

Xiaogang Wang,

Hongsheng Li,

Lewei Lu,

Jifeng Dai; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hao and Tian, Changyao and Shao, Jie and Zhu, Xizhou and Wang, Zhaokai and Zhu, Jinguo and Dou, Wenhan and Wang, Xiaogang and Li, Hongsheng and Lu, Lewei and Dai, Jifeng}, title = {SynerGen-VL: Towards Synergistic Image Understanding and Generation with Vision Experts and Token Folding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29767-29779} }
Uncertain Multimodal Intention and Emotion Understanding in the Wild: Qu Yang,

Qinghongya Shi,

Tongxin Wang,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Qu and Shi, Qinghongya and Wang, Tongxin and Ye, Mang}, title = {Uncertain Multimodal Intention and Emotion Understanding in the Wild}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24700-24709} }
VidTwin: Video VAE with Decoupled Structure and Dynamics: Yuchi Wang,

Junliang Guo,

Xinyi Xie,

Tianyu He,

Xu Sun,

Jiang Bian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuchi and Guo, Junliang and Xie, Xinyi and He, Tianyu and Sun, Xu and Bian, Jiang}, title = {VidTwin: Video VAE with Decoupled Structure and Dynamics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22922-22932} }
CL-LoRA: Continual Low-Rank Adaptation for Rehearsal-Free Class-Incremental Learning: Jiangpeng He,

Zhihao Duan,

Fengqing Zhu; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Jiangpeng and Duan, Zhihao and Zhu, Fengqing}, title = {CL-LoRA: Continual Low-Rank Adaptation for Rehearsal-Free Class-Incremental Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30534-30544} }
Design2GarmentCode: Turning Design Concepts to Tangible Garments Through Program Synthesis: Feng Zhou,

Ruiyang Liu,

Chen Liu,

Gaofeng He,

Yong-Lu Li,

Xiaogang Jin,

Huamin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Feng and Liu, Ruiyang and Liu, Chen and He, Gaofeng and Li, Yong-Lu and Jin, Xiaogang and Wang, Huamin}, title = {Design2GarmentCode: Turning Design Concepts to Tangible Garments Through Program Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23712-23722} }
Efficient Dynamic Scene Editing via 4D Gaussian-based Static-Dynamic Separation: Joohyun Kwon,

Hanbyel Cho,

Junmo Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2025_CVPR, author = {Kwon, Joohyun and Cho, Hanbyel and Kim, Junmo}, title = {Efficient Dynamic Scene Editing via 4D Gaussian-based Static-Dynamic Separation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26855-26865} }
Unlearning through Knowledge Overwriting: Reversible Federated Unlearning via Selective Sparse Adapter: Zhengyi Zhong,

Weidong Bao,

Ji Wang,

Shuai Zhang,

Jingxuan Zhou,

Lingjuan Lyu,

Wei Yang Bryan Lim; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhong_2025_CVPR, author = {Zhong, Zhengyi and Bao, Weidong and Wang, Ji and Zhang, Shuai and Zhou, Jingxuan and Lyu, Lingjuan and Lim, Wei Yang Bryan}, title = {Unlearning through Knowledge Overwriting: Reversible Federated Unlearning via Selective Sparse Adapter}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30661-30670} }
SocialMOIF: Multi-Order Intention Fusion for Pedestrian Trajectory Prediction: Kai Chen,

Xiaodong Zhao,

Yujie Huang,

Guoyu Fang,

Xiao Song,

Ruiping Wang,

Ziyuan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Kai and Zhao, Xiaodong and Huang, Yujie and Fang, Guoyu and Song, Xiao and Wang, Ruiping and Wang, Ziyuan}, title = {SocialMOIF: Multi-Order Intention Fusion for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22465-22475} }
HistoFS: Non-IID Histopathologic Whole Slide Image Classification via Federated Style Transfer with RoI-Preserving: Farchan Hakim Raswa,

Chun-Shien Lu,

Jia-Ching Wang; [pdf] [supp]
[bibtex]
@InProceedings{Raswa_2025_CVPR, author = {Raswa, Farchan Hakim and Lu, Chun-Shien and Wang, Jia-Ching}, title = {HistoFS: Non-IID Histopathologic Whole Slide Image Classification via Federated Style Transfer with RoI-Preserving}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30251-30260} }
SGSST: Scaling Gaussian Splatting Style Transfer: Bruno Galerne,

Jianling Wang,

Lara Raad,

Jean-Michel Morel; [pdf] [supp]
[bibtex]
@InProceedings{Galerne_2025_CVPR, author = {Galerne, Bruno and Wang, Jianling and Raad, Lara and Morel, Jean-Michel}, title = {SGSST: Scaling Gaussian Splatting Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26535-26544} }
Learning Bijective Surface Parameterization for Inferring Signed Distance Functions from Sparse Point Clouds with Grid Deformation: Takeshi Noda,

Chao Chen,

Junsheng Zhou,

Weiqi Zhang,

Yu-Shen Liu,

Zhizhong Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Noda_2025_CVPR, author = {Noda, Takeshi and Chen, Chao and Zhou, Junsheng and Zhang, Weiqi and Liu, Yu-Shen and Han, Zhizhong}, title = {Learning Bijective Surface Parameterization for Inferring Signed Distance Functions from Sparse Point Clouds with Grid Deformation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22139-22149} }
Balancing Two Classifiers via A Simplex ETF Structure for Model Calibration: Jiani Ni,

He Zhao,

Jintong Gao,

Dandan Guo,

Hongyuan Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2025_CVPR, author = {Ni, Jiani and Zhao, He and Gao, Jintong and Guo, Dandan and Zha, Hongyuan}, title = {Balancing Two Classifiers via A Simplex ETF Structure for Model Calibration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30712-30721} }
DAMM-Diffusion: Learning Divergence-Aware Multi-Modal Diffusion Model for Nanoparticles Distribution Prediction: Junjie Zhou,

Shouju Wang,

Yuxia Tang,

Qi Zhu,

Daoqiang Zhang,

Wei Shao; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Junjie and Wang, Shouju and Tang, Yuxia and Zhu, Qi and Zhang, Daoqiang and Shao, Wei}, title = {DAMM-Diffusion: Learning Divergence-Aware Multi-Modal Diffusion Model for Nanoparticles Distribution Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30886-30895} }
U-Know-DiffPAN: An Uncertainty-aware Knowledge Distillation Diffusion Framework with Details Enhancement for PAN-Sharpening: Sungpyo Kim,

Jeonghyeok Do,

Jaehyup Lee,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Sungpyo and Do, Jeonghyeok and Lee, Jaehyup and Kim, Munchurl}, title = {U-Know-DiffPAN: An Uncertainty-aware Knowledge Distillation Diffusion Framework with Details Enhancement for PAN-Sharpening}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23069-23079} }
RelationField: Relate Anything in Radiance Fields: Sebastian Koch,

Johanna Wald,

Mirco Colosi,

Narunas Vaskevicius,

Pedro Hermosilla,

Federico Tombari,

Timo Ropinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koch_2025_CVPR, author = {Koch, Sebastian and Wald, Johanna and Colosi, Mirco and Vaskevicius, Narunas and Hermosilla, Pedro and Tombari, Federico and Ropinski, Timo}, title = {RelationField: Relate Anything in Radiance Fields}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21706-21716} }
Let Humanoids Hike! Integrative Skill Development on Complex Trails: Kwan-Yee Lin,

Stella X. Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Kwan-Yee and Yu, Stella X.}, title = {Let Humanoids Hike! Integrative Skill Development on Complex Trails}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22498-22507} }
BF-STVSR: B-Splines and Fourier---Best Friends for High Fidelity Spatial-Temporal Video Super-Resolution: Eunjin Kim,

Hyeonjin Kim,

Kyong Hwan Jin,

Jaejun Yoo; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2025_CVPR, author = {Kim, Eunjin and Kim, Hyeonjin and Jin, Kyong Hwan and Yoo, Jaejun}, title = {BF-STVSR: B-Splines and Fourier---Best Friends for High Fidelity Spatial-Temporal Video Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28009-28018} }
DIO: Decomposable Implicit 4D Occupancy-Flow World Model: Christopher Diehl,

Quinlan Sykora,

Ben Agro,

Thomas Gilles,

Sergio Casas,

Raquel Urtasun; [pdf] [supp]
[bibtex]
@InProceedings{Diehl_2025_CVPR, author = {Diehl, Christopher and Sykora, Quinlan and Agro, Ben and Gilles, Thomas and Casas, Sergio and Urtasun, Raquel}, title = {DIO: Decomposable Implicit 4D Occupancy-Flow World Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27456-27466} }
SLADE: Shielding against Dual Exploits in Large Vision-Language Models: Md Zarif Hossain,

Ahmed Imteaj; [pdf] [supp]
[bibtex]
@InProceedings{Hossain_2025_CVPR, author = {Hossain, Md Zarif and Imteaj, Ahmed}, title = {SLADE: Shielding against Dual Exploits in Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24244-24254} }
Ego4o: Egocentric Human Motion Capture and Understanding from Multi-Modal Input: Jian Wang,

Rishabh Dabral,

Diogo Luvizon,

Zhe Cao,

Lingjie Liu,

Thabo Beeler,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Jian and Dabral, Rishabh and Luvizon, Diogo and Cao, Zhe and Liu, Lingjie and Beeler, Thabo and Theobalt, Christian}, title = {Ego4o: Egocentric Human Motion Capture and Understanding from Multi-Modal Input}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22668-22679} }
FreePCA: Integrating Consistency Information across Long-short Frames in Training-free Long Video Generation via Principal Component Analysis: Jiangtong Tan,

Hu Yu,

Jie Huang,

Jie Xiao,

Feng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2025_CVPR, author = {Tan, Jiangtong and Yu, Hu and Huang, Jie and Xiao, Jie and Zhao, Feng}, title = {FreePCA: Integrating Consistency Information across Long-short Frames in Training-free Long Video Generation via Principal Component Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27979-27988} }
Mind the Time: Temporally-Controlled Multi-Event Video Generation: Ziyi Wu,

Aliaksandr Siarohin,

Willi Menapace,

Ivan Skorokhodov,

Yuwei Fang,

Varnith Chordia,

Igor Gilitschenski,

Sergey Tulyakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Ziyi and Siarohin, Aliaksandr and Menapace, Willi and Skorokhodov, Ivan and Fang, Yuwei and Chordia, Varnith and Gilitschenski, Igor and Tulyakov, Sergey}, title = {Mind the Time: Temporally-Controlled Multi-Event Video Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23989-24000} }
Audio-Visual Semantic Graph Network for Audio-Visual Event Localization: Liang Liu,

Shuaiyong Li,

Yongqiang Zhu; [pdf]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Liang and Li, Shuaiyong and Zhu, Yongqiang}, title = {Audio-Visual Semantic Graph Network for Audio-Visual Event Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23957-23966} }
Video Motion Transfer with Diffusion Transformers: Alexander Pondaven,

Aliaksandr Siarohin,

Sergey Tulyakov,

Philip Torr,

Fabio Pizzati; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pondaven_2025_CVPR, author = {Pondaven, Alexander and Siarohin, Aliaksandr and Tulyakov, Sergey and Torr, Philip and Pizzati, Fabio}, title = {Video Motion Transfer with Diffusion Transformers}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22911-22921} }
Unified Reconstruction of Static and Dynamic Scenes from Events: Qiyao Gao,

Peiqi Duan,

Hanyue Lou,

Minggui Teng,

Ziqi Cai,

Xu Chen,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2025_CVPR, author = {Gao, Qiyao and Duan, Peiqi and Lou, Hanyue and Teng, Minggui and Cai, Ziqi and Chen, Xu and Shi, Boxin}, title = {Unified Reconstruction of Static and Dynamic Scenes from Events}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27914-27923} }
Automatic Spectral Calibration of Hyperspectral Images: Method, Dataset and Benchmark: Zhuoran Du,

Shaodi You,

Cheng Cheng,

Shikui Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2025_CVPR, author = {Du, Zhuoran and You, Shaodi and Cheng, Cheng and Wei, Shikui}, title = {Automatic Spectral Calibration of Hyperspectral Images: Method, Dataset and Benchmark}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28081-28090} }
Point-to-Region Loss for Semi-Supervised Point-Based Crowd Counting: Wei Lin,

Chenyang Zhao,

Antoni B. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Wei and Zhao, Chenyang and Chan, Antoni B.}, title = {Point-to-Region Loss for Semi-Supervised Point-Based Crowd Counting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29363-29373} }
Move-in-2D: 2D-Conditioned Human Motion Generation: Hsin-Ping Huang,

Yang Zhou,

Jui-Hsien Wang,

Difan Liu,

Feng Liu,

Ming-Hsuan Yang,

Zhan Xu; [pdf]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Hsin-Ping and Zhou, Yang and Wang, Jui-Hsien and Liu, Difan and Liu, Feng and Yang, Ming-Hsuan and Xu, Zhan}, title = {Move-in-2D: 2D-Conditioned Human Motion Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22766-22775} }
MATCHA: Towards Matching Anything: Fei Xue,

Sven Elflein,

Laura Leal-Taixé,

Qunjie Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Fei and Elflein, Sven and Leal-Taix\'e, Laura and Zhou, Qunjie}, title = {MATCHA: Towards Matching Anything}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27081-27091} }
CTRL-D: Controllable Dynamic 3D Scene Editing with Personalized 2D Diffusion: Kai He,

Chin-Hsuan Wu,

Igor Gilitschenski; [pdf] [supp]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Kai and Wu, Chin-Hsuan and Gilitschenski, Igor}, title = {CTRL-D: Controllable Dynamic 3D Scene Editing with Personalized 2D Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26630-26640} }
Separation of Powers: On Segregating Knowledge from Observation in LLM-enabled Knowledge-based Visual Question Answering: Zhen Yang,

Zhuo Tao,

Qi Chen,

Liang Li,

Yuankai Qi,

Anton van den Hengel,

Qingming Huang; [pdf]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Zhen and Tao, Zhuo and Chen, Qi and Li, Liang and Qi, Yuankai and van den Hengel, Anton and Huang, Qingming}, title = {Separation of Powers: On Segregating Knowledge from Observation in LLM-enabled Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24753-24762} }
SF2T: Self-supervised Fragment Finetuning of Video-LLMs for Fine-Grained Understanding: Yangliu Hu,

Zikai Song,

Na Feng,

Yawei Luo,

Junqing Yu,

Yi-Ping Phoebe Chen,

Wei Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Yangliu and Song, Zikai and Feng, Na and Luo, Yawei and Yu, Junqing and Chen, Yi-Ping Phoebe and Yang, Wei}, title = {SF2T: Self-supervised Fragment Finetuning of Video-LLMs for Fine-Grained Understanding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29108-29117} }
Fitted Neural Lossless Image Compression: Zhe Zhang,

Zhenzhong Chen,

Shan Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zhe and Chen, Zhenzhong and Liu, Shan}, title = {Fitted Neural Lossless Image Compression}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23249-23258} }
JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration: Yunlong Lin,

Zixu Lin,

Haoyu Chen,

Panwang Pan,

Chenxin Li,

Sixiang Chen,

Kairun Wen,

Yeying Jin,

Wenbo Li,

Xinghao Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Yunlong and Lin, Zixu and Chen, Haoyu and Pan, Panwang and Li, Chenxin and Chen, Sixiang and Wen, Kairun and Jin, Yeying and Li, Wenbo and Ding, Xinghao}, title = {JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22369-22380} }
F-LMM: Grounding Frozen Large Multimodal Models: Size Wu,

Sheng Jin,

Wenwei Zhang,

Lumin Xu,

Wentao Liu,

Wei Li,

Chen Change Loy; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Size and Jin, Sheng and Zhang, Wenwei and Xu, Lumin and Liu, Wentao and Li, Wei and Loy, Chen Change}, title = {F-LMM: Grounding Frozen Large Multimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24710-24721} }
EntityErasure: Erasing Entity Cleanly via Amodal Entity Segmentation and Completion: Yixing Zhu,

Qing Zhang,

Yitong Wang,

Yongwei Nie,

Wei-Shi Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yixing and Zhang, Qing and Wang, Yitong and Nie, Yongwei and Zheng, Wei-Shi}, title = {EntityErasure: Erasing Entity Cleanly via Amodal Entity Segmentation and Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28274-28283} }
Joint Out-of-Distribution Filtering and Data Discovery Active Learning: Sebastian Schmidt,

Leonard Schenk,

Leo Schwinn,

Stephan Günnemann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Schmidt_2025_CVPR, author = {Schmidt, Sebastian and Schenk, Leonard and Schwinn, Leo and G\"unnemann, Stephan}, title = {Joint Out-of-Distribution Filtering and Data Discovery Active Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25677-25687} }
Finding Local Diffusion Schrodinger Bridge using Kolmogorov-Arnold Network: Xingyu Qiu,

Mengying Yang,

Xinghua Ma,

Fanding Li,

Dong Liang,

Gongning Luo,

Wei Wang,

Kuanquan Wang,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2025_CVPR, author = {Qiu, Xingyu and Yang, Mengying and Ma, Xinghua and Li, Fanding and Liang, Dong and Luo, Gongning and Wang, Wei and Wang, Kuanquan and Li, Shuo}, title = {Finding Local Diffusion Schrodinger Bridge using Kolmogorov-Arnold Network}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23227-23236} }
CorrBEV: Multi-View 3D Object Detection by Correlation Learning with Multi-modal Prototypes: Ziteng Xue,

Mingzhe Guo,

Heng Fan,

Shihui Zhang,

Zhipeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2025_CVPR, author = {Xue, Ziteng and Guo, Mingzhe and Fan, Heng and Zhang, Shihui and Zhang, Zhipeng}, title = {CorrBEV: Multi-View 3D Object Detection by Correlation Learning with Multi-modal Prototypes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27413-27423} }
Completion as Enhancement: A Degradation-Aware Selective Image Guided Network for Depth Completion: Zhiqiang Yan,

Zhengxue Wang,

Kun Wang,

Jun Li,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Zhiqiang and Wang, Zhengxue and Wang, Kun and Li, Jun and Yang, Jian}, title = {Completion as Enhancement: A Degradation-Aware Selective Image Guided Network for Depth Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26943-26953} }
Around the World in 80 Timesteps: A Generative Approach to Global Visual Geolocation: Nicolas Dufour,

Vicky Kalogeiton,

David Picard,

Loic Landrieu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dufour_2025_CVPR, author = {Dufour, Nicolas and Kalogeiton, Vicky and Picard, David and Landrieu, Loic}, title = {Around the World in 80 Timesteps: A Generative Approach to Global Visual Geolocation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23016-23026} }
Real-time High-fidelity Gaussian Human Avatars with Position-based Interpolation of Spatially Distributed MLPs: Youyi Zhan,

Tianjia Shao,

Yin Yang,

Kun Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2025_CVPR, author = {Zhan, Youyi and Shao, Tianjia and Yang, Yin and Zhou, Kun}, title = {Real-time High-fidelity Gaussian Human Avatars with Position-based Interpolation of Spatially Distributed MLPs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26297-26307} }
RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments: Haisheng Su,

Feixiang Song,

Cong Ma,

Wei Wu,

Junchi Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Su_2025_CVPR, author = {Su, Haisheng and Song, Feixiang and Ma, Cong and Wu, Wei and Yan, Junchi}, title = {RoboSense: Large-scale Dataset and Benchmark for Egocentric Robot Perception and Navigation in Crowded and Unstructured Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27446-27455} }
DEFOM-Stereo: Depth Foundation Model Based Stereo Matching: Hualie Jiang,

Zhiqiang Lou,

Laiyan Ding,

Rui Xu,

Minglang Tan,

Wenjie Jiang,

Rui Huang; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Hualie and Lou, Zhiqiang and Ding, Laiyan and Xu, Rui and Tan, Minglang and Jiang, Wenjie and Huang, Rui}, title = {DEFOM-Stereo: Depth Foundation Model Based Stereo Matching}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21857-21867} }
DiskVPS: Vanishing Point Detector via Hough Transform in a Disk Region: Jianping Wu; [pdf]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jianping}, title = {DiskVPS: Vanishing Point Detector via Hough Transform in a Disk Region}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27049-27058} }
Seeing Far and Clearly: Mitigating Hallucinations in MLLMs with Attention Causal Decoding: Feilong Tang,

Chengzhi Liu,

Zhongxing Xu,

Ming Hu,

Zile Huang,

Haochen Xue,

Ziyang Chen,

Zelin Peng,

Zhiwei Yang,

Sijin Zhou,

Wenxue Li,

Yulong Li,

Wenxuan Song,

Shiyan Su,

Wei Feng,

Jionglong Su,

Mingquan Lin,

Yifan Peng,

Xuelian Cheng,

Imran Razzak,

Zongyuan Ge; [pdf] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Feilong and Liu, Chengzhi and Xu, Zhongxing and Hu, Ming and Huang, Zile and Xue, Haochen and Chen, Ziyang and Peng, Zelin and Yang, Zhiwei and Zhou, Sijin and Li, Wenxue and Li, Yulong and Song, Wenxuan and Su, Shiyan and Feng, Wei and Su, Jionglong and Lin, Mingquan and Peng, Yifan and Cheng, Xuelian and Razzak, Imran and Ge, Zongyuan}, title = {Seeing Far and Clearly: Mitigating Hallucinations in MLLMs with Attention Causal Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26147-26159} }
Towards Autonomous Micromobility through Scalable Urban Simulation: Wayne Wu,

Honglin He,

Chaoyuan Zhang,

Jack He,

Seth Z. Zhao,

Ran Gong,

Quanyi Li,

Bolei Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Wayne and He, Honglin and Zhang, Chaoyuan and He, Jack and Zhao, Seth Z. and Gong, Ran and Li, Quanyi and Zhou, Bolei}, title = {Towards Autonomous Micromobility through Scalable Urban Simulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27553-27563} }
Language-Assisted Debiasing and Smoothing for Foundation Model-Based Semi-Supervised Learning: Na Zheng,

Xuemeng Song,

Xue Dong,

Aashish Nikhil Ghosh,

Liqiang Nie,

Roger Zimmermann; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Na and Song, Xuemeng and Dong, Xue and Ghosh, Aashish Nikhil and Nie, Liqiang and Zimmermann, Roger}, title = {Language-Assisted Debiasing and Smoothing for Foundation Model-Based Semi-Supervised Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25708-25717} }
EdgeMovingNet: Edge-preserving Point Cloud Reconstruction via Joint Geometry Features: Xinran Yang,

Donghao Ji,

Yuanqi Li,

Junyuan Xie,

Jie Guo,

Yanwen Guo; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Xinran and Ji, Donghao and Li, Yuanqi and Xie, Junyuan and Guo, Jie and Guo, Yanwen}, title = {EdgeMovingNet: Edge-preserving Point Cloud Reconstruction via Joint Geometry Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22150-22160} }
Harnessing Frozen Unimodal Encoders for Flexible Multimodal Alignment: Mayug Maniparambil,

Raiymbek Akshulakov,

Yasser Abdelaziz Dahou Djilali,

Sanath Narayan,

Ankit Singh,

Noel E. O'Connor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maniparambil_2025_CVPR, author = {Maniparambil, Mayug and Akshulakov, Raiymbek and Djilali, Yasser Abdelaziz Dahou and Narayan, Sanath and Singh, Ankit and O'Connor, Noel E.}, title = {Harnessing Frozen Unimodal Encoders for Flexible Multimodal Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29847-29857} }
Feature Information Driven Position Gaussian Distribution Estimation for Tiny Object Detection: Jinghao Bian,

Mingtao Feng,

Weisheng Dong,

Fangfang Wu,

Jianqiao Luo,

Yaonan Wang,

Guangming Shi; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2025_CVPR, author = {Bian, Jinghao and Feng, Mingtao and Dong, Weisheng and Wu, Fangfang and Luo, Jianqiao and Wang, Yaonan and Shi, Guangming}, title = {Feature Information Driven Position Gaussian Distribution Estimation for Tiny Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30376-30386} }
Enhancing Diversity for Data-free Quantization: Kai Zhao,

Zhihao Zhuang,

Miao Zhang,

Chenjuan Guo,

Yang Shu,

Bin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Kai and Zhuang, Zhihao and Zhang, Miao and Guo, Chenjuan and Shu, Yang and Yang, Bin}, title = {Enhancing Diversity for Data-free Quantization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {20969-20978} }
From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport: Quentin Bouniot,

Ievgen Redko,

Anton Mallasto,

Charlotte Laclau,

Oliver Struckmeier,

Karol Arndt,

Markus Heinonen,

Ville Kyrki,

Samuel Kaski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bouniot_2025_CVPR, author = {Bouniot, Quentin and Redko, Ievgen and Mallasto, Anton and Laclau, Charlotte and Struckmeier, Oliver and Arndt, Karol and Heinonen, Markus and Kyrki, Ville and Kaski, Samuel}, title = {From Alexnet to Transformers: Measuring the Non-linearity of Deep Neural Networks with Affine Optimal Transport}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25250-25260} }
Prompt2Perturb (P2P): Text-Guided Diffusion-Based Adversarial Attack on Breast Ultrasound Images: Yasamin Medghalchi,

Moein Heidari,

Clayton Allard,

Leonid Sigal,

Ilker Hacihaliloglu; [pdf] [arXiv]
[bibtex]
@InProceedings{Medghalchi_2025_CVPR, author = {Medghalchi, Yasamin and Heidari, Moein and Allard, Clayton and Sigal, Leonid and Hacihaliloglu, Ilker}, title = {Prompt2Perturb (P2P): Text-Guided Diffusion-Based Adversarial Attack on Breast Ultrasound Images}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28564-28574} }
COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection: Jinqi Xiao,

Shen Sang,

Tiancheng Zhi,

Jing Liu,

Qing Yan,

Linjie Luo,

Bo Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2025_CVPR, author = {Xiao, Jinqi and Sang, Shen and Zhi, Tiancheng and Liu, Jing and Yan, Qing and Luo, Linjie and Yuan, Bo}, title = {COAP: Memory-Efficient Training with Correlation-Aware Gradient Projection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30116-30126} }
Gyro-based Neural Single Image Deblurring: Heemin Yang,

Jaesung Rim,

Seungyong Lee,

Seung-Hwan Baek,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Heemin and Rim, Jaesung and Lee, Seungyong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Gyro-based Neural Single Image Deblurring}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23111-23120} }
Improved Monocular Depth Prediction Using Distance Transform Over Pre-semantic Contours with Self-supervised Neural Networks: Marwane Hariat,

Antoine Manzanera,

David Filliat; [pdf] [supp]
[bibtex]
@InProceedings{Hariat_2025_CVPR, author = {Hariat, Marwane and Manzanera, Antoine and Filliat, David}, title = {Improved Monocular Depth Prediction Using Distance Transform Over Pre-semantic Contours with Self-supervised Neural Networks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21868-21879} }
Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body: Zeqing Wang,

Qingyang Ma,

Wentao Wan,

Haojie Li,

Keze Wang,

Yonghong Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zeqing and Ma, Qingyang and Wan, Wentao and Li, Haojie and Wang, Keze and Tian, Yonghong}, title = {Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21226-21237} }
Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation: Yuhui Zhang,

Yuchang Su,

Yiming Liu,

Xiaohan Wang,

James Burgess,

Elaine Sui,

Chenyu Wang,

Josiah Aklilu,

Alejandro Lozano,

Anjiang Wei,

Ludwig Schmidt,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yuhui and Su, Yuchang and Liu, Yiming and Wang, Xiaohan and Burgess, James and Sui, Elaine and Wang, Chenyu and Aklilu, Josiah and Lozano, Alejandro and Wei, Anjiang and Schmidt, Ludwig and Yeung-Levy, Serena}, title = {Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29580-29590} }
ROLL: Robust Noisy Pseudo-label Learning for Multi-View Clustering with Noisy Correspondence: Yuan Sun,

Yongxiang Li,

Zhenwen Ren,

Guiduo Duan,

Dezhong Peng,

Peng Hu; [pdf]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Yuan and Li, Yongxiang and Ren, Zhenwen and Duan, Guiduo and Peng, Dezhong and Hu, Peng}, title = {ROLL: Robust Noisy Pseudo-label Learning for Multi-View Clustering with Noisy Correspondence}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30732-30741} }
Towards In-the-wild 3D Plane Reconstruction from a Single Image: Jiachen Liu,

Rui Yu,

Sili Chen,

Sharon X. Huang,

Hengkai Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jiachen and Yu, Rui and Chen, Sili and Huang, Sharon X. and Guo, Hengkai}, title = {Towards In-the-wild 3D Plane Reconstruction from a Single Image}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27027-27037} }
PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction: Eduard Poesina,

Adriana Valentina Costache,

Adrian-Gabriel Chifu,

Josiane Mothe,

Radu Tudor Ionescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Poesina_2025_CVPR, author = {Poesina, Eduard and Costache, Adriana Valentina and Chifu, Adrian-Gabriel and Mothe, Josiane and Ionescu, Radu Tudor}, title = {PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28651-28661} }
CheXwhatsApp: A Dataset for Exploring Challenges in the Diagnosis of Chest X-rays through Mobile Devices: Mariamma Antony,

Rajiv Porana,

Sahil M Lathiya,

Siva Teja Kakileti,

Chiranjib Bhattacharyya; [pdf] [supp]
[bibtex]
@InProceedings{Antony_2025_CVPR, author = {Antony, Mariamma and Porana, Rajiv and Lathiya, Sahil M and Kakileti, Siva Teja and Bhattacharyya, Chiranjib}, title = {CheXwhatsApp: A Dataset for Exploring Challenges in the Diagnosis of Chest X-rays through Mobile Devices}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25887-25896} }
Degradation-Aware Feature Perturbation for All-in-One Image Restoration: Xiangpeng Tian,

Xiangyu Liao,

Xiao Liu,

Meng Li,

Chao Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2025_CVPR, author = {Tian, Xiangpeng and Liao, Xiangyu and Liu, Xiao and Li, Meng and Ren, Chao}, title = {Degradation-Aware Feature Perturbation for All-in-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28165-28175} }
GenDeg: Diffusion-based Degradation Synthesis for Generalizable All-In-One Image Restoration: Sudarshan Rajagopalan,

Nithin Gopalakrishnan Nair,

Jay N. Paranjape,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rajagopalan_2025_CVPR, author = {Rajagopalan, Sudarshan and Nair, Nithin Gopalakrishnan and Paranjape, Jay N. and Patel, Vishal M.}, title = {GenDeg: Diffusion-based Degradation Synthesis for Generalizable All-In-One Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28144-28154} }
The Power of Context: How Multimodality Improves Image Super-Resolution: Kangfu Mei,

Hossein Talebi,

Mojtaba Ardakani,

Vishal M. Patel,

Peyman Milanfar,

Mauricio Delbracio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mei_2025_CVPR, author = {Mei, Kangfu and Talebi, Hossein and Ardakani, Mojtaba and Patel, Vishal M. and Milanfar, Peyman and Delbracio, Mauricio}, title = {The Power of Context: How Multimodality Improves Image Super-Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23141-23152} }
Detect Any Mirrors: Boosting Learning Reliability on Large-Scale Unlabeled Data with an Iterative Data Engine: Zhaohu Xing,

Lihao Liu,

Yijun Yang,

Hongqiu Wang,

Tian Ye,

Sixiang Chen,

Wenxue Li,

Guang Liu,

Lei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Xing_2025_CVPR, author = {Xing, Zhaohu and Liu, Lihao and Yang, Yijun and Wang, Hongqiu and Ye, Tian and Chen, Sixiang and Li, Wenxue and Liu, Guang and Zhu, Lei}, title = {Detect Any Mirrors: Boosting Learning Reliability on Large-Scale Unlabeled Data with an Iterative Data Engine}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25476-25486} }
4D LangSplat: 4D Language Gaussian Splatting via Multimodal Large Language Models: Wanhua Li,

Renping Zhou,

Jiawei Zhou,

Yingwei Song,

Johannes Herter,

Minghan Qin,

Gao Huang,

Hanspeter Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Wanhua and Zhou, Renping and Zhou, Jiawei and Song, Yingwei and Herter, Johannes and Qin, Minghan and Huang, Gao and Pfister, Hanspeter}, title = {4D LangSplat: 4D Language Gaussian Splatting via Multimodal Large Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22001-22011} }
MotionMap: Representing Multimodality in Human Pose Forecasting: Reyhaneh Hosseininejad,

Megh Shukla,

Saeed Saadatnejad,

Mathieu Salzmann,

Alexandre Alahi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hosseininejad_2025_CVPR, author = {Hosseininejad, Reyhaneh and Shukla, Megh and Saadatnejad, Saeed and Salzmann, Mathieu and Alahi, Alexandre}, title = {MotionMap: Representing Multimodality in Human Pose Forecasting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22680-22689} }
Factored-NeuS: Reconstructing Surfaces, Illumination, and Materials of Possibly Glossy Objects: Yue Fan,

Ningjing Fan,

Ivan Skorokhodov,

Oleg Voynov,

Savva Ignatyev,

Evgeny Burnaev,

Peter Wonka,

Yiqun Wang; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Yue and Fan, Ningjing and Skorokhodov, Ivan and Voynov, Oleg and Ignatyev, Savva and Burnaev, Evgeny and Wonka, Peter and Wang, Yiqun}, title = {Factored-NeuS: Reconstructing Surfaces, Illumination, and Materials of Possibly Glossy Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21317-21327} }
GaussianSpa: An "Optimizing-Sparsifying" Simplification Framework for Compact and High-Quality 3D Gaussian Splatting: Yangming Zhang,

Wenqi Jia,

Wei Niu,

Miao Yin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yangming and Jia, Wenqi and Niu, Wei and Yin, Miao}, title = {GaussianSpa: An ''Optimizing-Sparsifying'' Simplification Framework for Compact and High-Quality 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26673-26682} }
Navigating the Unseen: Zero-shot Scene Graph Generation via Capsule-Based Equivariant Features: Wenhuan Huang,

Yi JI,

Guiqian Zhu,

Li Ying,

Chunping Liu; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Wenhuan and JI, Yi and Zhu, Guiqian and Ying, Li and Liu, Chunping}, title = {Navigating the Unseen: Zero-shot Scene Graph Generation via Capsule-Based Equivariant Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29448-29457} }
VL-RewardBench: A Challenging Benchmark for Vision-Language Generative Reward Models: Lei Li,

Yuancheng Wei,

Zhihui Xie,

Xuqing Yang,

Yifan Song,

Peiyi Wang,

Chenxin An,

Tianyu Liu,

Sujian Li,

Bill Yuchen Lin,

Lingpeng Kong,

Qi Liu; [pdf]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Lei and Wei, Yuancheng and Xie, Zhihui and Yang, Xuqing and Song, Yifan and Wang, Peiyi and An, Chenxin and Liu, Tianyu and Li, Sujian and Lin, Bill Yuchen and Kong, Lingpeng and Liu, Qi}, title = {VL-RewardBench: A Challenging Benchmark for Vision-Language Generative Reward Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24657-24668} }
ASHiTA: Automatic Scene-grounded HIerarchical Task Analysis: Yun Chang,

Leonor Fermoselle,

Duy Ta,

Bernadette Bucher,

Luca Carlone,

Jiuguang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2025_CVPR, author = {Chang, Yun and Fermoselle, Leonor and Ta, Duy and Bucher, Bernadette and Carlone, Luca and Wang, Jiuguang}, title = {ASHiTA: Automatic Scene-grounded HIerarchical Task Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29458-29468} }
Discovering Fine-Grained Visual-Concept Relations by Disentangled Optimal Transport Concept Bottleneck Models: Yan Xie,

Zequn Zeng,

Hao Zhang,

Yucheng Ding,

Yi Wang,

Zhengjue Wang,

Bo Chen,

Hongwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2025_CVPR, author = {Xie, Yan and Zeng, Zequn and Zhang, Hao and Ding, Yucheng and Wang, Yi and Wang, Zhengjue and Chen, Bo and Liu, Hongwei}, title = {Discovering Fine-Grained Visual-Concept Relations by Disentangled Optimal Transport Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30199-30209} }
RoomTour3D: Geometry-Aware Video-Instruction Tuning for Embodied Navigation: Mingfei Han,

Liang Ma,

Kamila Zhumakhanova,

Ekaterina Radionova,

Jingyi Zhang,

Xiaojun Chang,

Xiaodan Liang,

Ivan Laptev; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2025_CVPR, author = {Han, Mingfei and Ma, Liang and Zhumakhanova, Kamila and Radionova, Ekaterina and Zhang, Jingyi and Chang, Xiaojun and Liang, Xiaodan and Laptev, Ivan}, title = {RoomTour3D: Geometry-Aware Video-Instruction Tuning for Embodied Navigation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27586-27596} }
Bringing CLIP to the Clinic: Dynamic Soft Labels and Negation-Aware Learning for Medical Analysis: Hanbin Ko,

Chang-Min Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ko_2025_CVPR, author = {Ko, Hanbin and Park, Chang-Min}, title = {Bringing CLIP to the Clinic: Dynamic Soft Labels and Negation-Aware Learning for Medical Analysis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25897-25906} }
A Semantic Knowledge Complementarity based Decoupling Framework for Semi-supervised Class-imbalanced Medical Image Segmentation: Zheng Zhang,

Guanchun Yin,

Bo Zhang,

Wu Liu,

Xiuzhuang Zhou,

Wendong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Zheng and Yin, Guanchun and Zhang, Bo and Liu, Wu and Zhou, Xiuzhuang and Wang, Wendong}, title = {A Semantic Knowledge Complementarity based Decoupling Framework for Semi-supervised Class-imbalanced Medical Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25940-25949} }
FedBiP: Heterogeneous One-Shot Federated Learning with Personalized Latent Diffusion Models: Haokun Chen,

Hang Li,

Yao Zhang,

Jinhe Bi,

Gengyuan Zhang,

Yueqi Zhang,

Philip Torr,

Jindong Gu,

Denis Krompass,

Volker Tresp; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Haokun and Li, Hang and Zhang, Yao and Bi, Jinhe and Zhang, Gengyuan and Zhang, Yueqi and Torr, Philip and Gu, Jindong and Krompass, Denis and Tresp, Volker}, title = {FedBiP: Heterogeneous One-Shot Federated Learning with Personalized Latent Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30440-30450} }
GCE-Pose: Global Context Enhancement for Category-level Object Pose Estimation: Weihang Li,

Hongli XU,

Junwen Huang,

Hyunjun Jung,

Peter KT Yu,

Nassir Navab,

Benjamin Busam; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Weihang and XU, Hongli and Huang, Junwen and Jung, Hyunjun and Yu, Peter KT and Navab, Nassir and Busam, Benjamin}, title = {GCE-Pose: Global Context Enhancement for Category-level Object Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27154-27165} }
Learning from Neighbors: Category Extrapolation for Long-Tail Learning: Shizhen Zhao,

Xin Wen,

Jiahui Liu,

Chuofan Ma,

Chunfeng Yuan,

Xiaojuan Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Shizhen and Wen, Xin and Liu, Jiahui and Ma, Chuofan and Yuan, Chunfeng and Qi, Xiaojuan}, title = {Learning from Neighbors: Category Extrapolation for Long-Tail Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30483-30492} }
Material Anything: Generating Materials for Any 3D Object via Diffusion: Xin Huang,

Tengfei Wang,

Ziwei Liu,

Qing Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Xin and Wang, Tengfei and Liu, Ziwei and Wang, Qing}, title = {Material Anything: Generating Materials for Any 3D Object via Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26556-26565} }
ImagineFSL: Self-Supervised Pretraining Matters on Imagined Base Set for VLM-based Few-shot Learning: Haoyuan Yang,

Xiaoou Li,

Jiaming Lv,

Xianjun Cheng,

Qilong Wang,

Peihua Li; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Haoyuan and Li, Xiaoou and Lv, Jiaming and Cheng, Xianjun and Wang, Qilong and Li, Peihua}, title = {ImagineFSL: Self-Supervised Pretraining Matters on Imagined Base Set for VLM-based Few-shot Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30020-30031} }
Continuous Locomotive Crowd Behavior Generation: Inhwan Bae,

Junoh Lee,

Hae-Gon Jeon; [pdf] [arXiv]
[bibtex]
@InProceedings{Bae_2025_CVPR, author = {Bae, Inhwan and Lee, Junoh and Jeon, Hae-Gon}, title = {Continuous Locomotive Crowd Behavior Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22416-22431} }
Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness: Beier Zhu,

Jiequan Cui,

Hanwang Zhang,

Chi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Beier and Cui, Jiequan and Zhang, Hanwang and Zhang, Chi}, title = {Project-Probe-Aggregate: Efficient Fine-Tuning for Group Robustness}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25487-25496} }
Implicit Bias Injection Attacks against Text-to-Image Diffusion Models: Huayang Huang,

Xiangye Jin,

Jiaxu Miao,

Yu Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Huayang and Jin, Xiangye and Miao, Jiaxu and Wu, Yu}, title = {Implicit Bias Injection Attacks against Text-to-Image Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28779-28789} }
ROICtrl: Boosting Instance Control for Visual Generation: Yuchao Gu,

Yipin Zhou,

Yunfan Ye,

Yixin Nie,

Licheng Yu,

Pingchuan Ma,

Kevin Qinghong Lin,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2025_CVPR, author = {Gu, Yuchao and Zhou, Yipin and Ye, Yunfan and Nie, Yixin and Yu, Licheng and Ma, Pingchuan and Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {ROICtrl: Boosting Instance Control for Visual Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23658-23667} }
Cropper: Vision-Language Model for Image Cropping through In-Context Learning: Seung Hyun Lee,

Jijun Jiang,

Yiran Xu,

Zhuofang Li,

Junjie Ke,

Yinxiao Li,

Junfeng He,

Steven Hickson,

Katie Datsenko,

Sangpil Kim,

Ming-Hsuan Yang,

Irfan Essa,

Feng Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Seung Hyun and Jiang, Jijun and Xu, Yiran and Li, Zhuofang and Ke, Junjie and Li, Yinxiao and He, Junfeng and Hickson, Steven and Datsenko, Katie and Kim, Sangpil and Yang, Ming-Hsuan and Essa, Irfan and Yang, Feng}, title = {Cropper: Vision-Language Model for Image Cropping through In-Context Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30010-30019} }
ScaMo: Exploring the Scaling Law in Autoregressive Motion Generation Model: Shunlin Lu,

Jingbo Wang,

Zeyu Lu,

Ling-Hao Chen,

Wenxun Dai,

Junting Dong,

Zhiyang Dou,

Bo Dai,

Ruimao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Shunlin and Wang, Jingbo and Lu, Zeyu and Chen, Ling-Hao and Dai, Wenxun and Dong, Junting and Dou, Zhiyang and Dai, Bo and Zhang, Ruimao}, title = {ScaMo: Exploring the Scaling Law in Autoregressive Motion Generation Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27872-27882} }
ICE: Intrinsic Concept Extraction from a Single Image via Diffusion Models: Fernando Julio Cendra,

Kai Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cendra_2025_CVPR, author = {Cendra, Fernando Julio and Han, Kai}, title = {ICE: Intrinsic Concept Extraction from a Single Image via Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23734-23743} }
ASIGN: An Anatomy-aware Spatial Imputation Graphic Network for 3D Spatial Transcriptomics: Junchao Zhu,

Ruining Deng,

Tianyuan Yao,

Juming Xiong,

Chongyu Qu,

Junlin Guo,

Siqi Lu,

Mengmeng Yin,

Yu Wang,

Shilin Zhao,

Haichun Yang,

Yuankai Huo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Junchao and Deng, Ruining and Yao, Tianyuan and Xiong, Juming and Qu, Chongyu and Guo, Junlin and Lu, Siqi and Yin, Mengmeng and Wang, Yu and Zhao, Shilin and Yang, Haichun and Huo, Yuankai}, title = {ASIGN: An Anatomy-aware Spatial Imputation Graphic Network for 3D Spatial Transcriptomics}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30829-30838} }
MultiMorph: On-demand Atlas Construction: S. Mazdak Abulnaga,

Andrew Hoopes,

Neel Dey,

Malte Hoffmann,

Bruce Fischl,

John Guttag,

Adrian Dalca; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Abulnaga_2025_CVPR, author = {Abulnaga, S. Mazdak and Hoopes, Andrew and Dey, Neel and Hoffmann, Malte and Fischl, Bruce and Guttag, John and Dalca, Adrian}, title = {MultiMorph: On-demand Atlas Construction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30906-30917} }
Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding: Wei Suo,

Lijun Zhang,

Mengyang Sun,

Lin Yuanbo Wu,

Peng Wang,

Yanning Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Suo_2025_CVPR, author = {Suo, Wei and Zhang, Lijun and Sun, Mengyang and Wu, Lin Yuanbo and Wang, Peng and Zhang, Yanning}, title = {Octopus: Alleviating Hallucination via Dynamic Contrastive Decoding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29904-29914} }
Spiking Transformer: Introducing Accurate Addition-Only Spiking Self-Attention for Transformer: Yufei Guo,

Xiaode Liu,

Yuanpei Chen,

Weihang Peng,

Yuhan Zhang,

Zhe Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Yufei and Liu, Xiaode and Chen, Yuanpei and Peng, Weihang and Zhang, Yuhan and Ma, Zhe}, title = {Spiking Transformer: Introducing Accurate Addition-Only Spiking Self-Attention for Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24398-24408} }
MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes: Ruijie Lu,

Yixin Chen,

Junfeng Ni,

Baoxiong Jia,

Yu Liu,

Diwen Wan,

Gang Zeng,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Ruijie and Chen, Yixin and Ni, Junfeng and Jia, Baoxiong and Liu, Yu and Wan, Diwen and Zeng, Gang and Huang, Siyuan}, title = {MOVIS: Enhancing Multi-Object Novel View Synthesis for Indoor Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26767-26778} }
Symbolic Representation for Any-to-Any Generative Tasks: Jiaqi Chen,

Xiaoye Zhu,

Yue Wang,

Tianyang Liu,

Xinhui Chen,

Ying Chen,

Chak Tou Leong,

Yifei Ke,

Joseph Liu,

Yiwen Yuan,

Julian McAuley,

Li-jia Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Jiaqi and Zhu, Xiaoye and Wang, Yue and Liu, Tianyang and Chen, Xinhui and Chen, Ying and Leong, Chak Tou and Ke, Yifei and Liu, Joseph and Yuan, Yiwen and McAuley, Julian and Li, Li-jia}, title = {Symbolic Representation for Any-to-Any Generative Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27816-27826} }
Protecting Your Video Content: Disrupting Automated Video-based LLM Annotations: Haitong Liu,

Kuofeng Gao,

Yang Bai,

Jinmin Li,

Jinxiao Shan,

Tao Dai,

Shu-Tao Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Haitong and Gao, Kuofeng and Bai, Yang and Li, Jinmin and Shan, Jinxiao and Dai, Tao and Xia, Shu-Tao}, title = {Protecting Your Video Content: Disrupting Automated Video-based LLM Annotations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24056-24065} }
MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations: Ziyang Zhang,

Yang Yu,

Yucheng Chen,

Xulei Yang,

Si Yong Yeo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Ziyang and Yu, Yang and Chen, Yucheng and Yang, Xulei and Yeo, Si Yong}, title = {MedUnifier: Unifying Vision-and-Language Pre-training on Medical Data with Vision Generation Task using Discrete Visual Representations}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29744-29755} }
ArticulatedGS: Self-supervised Digital Twin Modeling of Articulated Objects using 3D Gaussian Splatting: Junfu Guo,

Yu Xin,

Gaoyi Liu,

Kai Xu,

Ligang Liu,

Ruizhen Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Junfu and Xin, Yu and Liu, Gaoyi and Xu, Kai and Liu, Ligang and Hu, Ruizhen}, title = {ArticulatedGS: Self-supervised Digital Twin Modeling of Articulated Objects using 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27144-27153} }
Leveraging 3D Geometric Priors in 2D Rotation Symmetry Detection: Ahyun Seo,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2025_CVPR, author = {Seo, Ahyun and Cho, Minsu}, title = {Leveraging 3D Geometric Priors in 2D Rotation Symmetry Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22109-22118} }
Noise Calibration and Spatial-Frequency Interactive Network for STEM Image Enhancement: Hesong Li,

Ziqi Wu,

Ruiwen Shao,

Tao Zhang,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Hesong and Wu, Ziqi and Shao, Ruiwen and Zhang, Tao and Fu, Ying}, title = {Noise Calibration and Spatial-Frequency Interactive Network for STEM Image Enhancement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21287-21296} }
Homogeneous Dynamics Space for Heterogeneous Humans: Xinpeng Liu,

Junxuan Liang,

Chenshuo Zhang,

Zixuan Cai,

Cewu Lu,

Yong-Lu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xinpeng and Liang, Junxuan and Zhang, Chenshuo and Cai, Zixuan and Lu, Cewu and Li, Yong-Lu}, title = {Homogeneous Dynamics Space for Heterogeneous Humans}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27782-27793} }
TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection: Yoon Gyo Jung,

Jaewoo Park,

Jaeho Yoon,

Kuan-Chuan Peng,

Wonchul Kim,

Andrew Beng Jin Teoh,

Octavia Camps; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2025_CVPR, author = {Jung, Yoon Gyo and Park, Jaewoo and Yoon, Jaeho and Peng, Kuan-Chuan and Kim, Wonchul and Teoh, Andrew Beng Jin and Camps, Octavia}, title = {TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25539-25548} }
Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution: Siwei Tu,

Ben Fei,

Weidong Yang,

Fenghua Ling,

Hao Chen,

Zili Liu,

Kun Chen,

Hang Fan,

Wanli Ouyang,

Lei Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tu_2025_CVPR, author = {Tu, Siwei and Fei, Ben and Yang, Weidong and Ling, Fenghua and Chen, Hao and Liu, Zili and Chen, Kun and Fan, Hang and Ouyang, Wanli and Bai, Lei}, title = {Satellite Observations Guided Diffusion Model for Accurate Meteorological States at Arbitrary Resolution}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28071-28080} }
Reconstructing People, Places, and Cameras: Lea Müller,

Hongsuk Choi,

Anthony Zhang,

Brent Yi,

Jitendra Malik,

Angjoo Kanazawa; [pdf] [supp]
[bibtex]
@InProceedings{Muller_2025_CVPR, author = {M\"uller, Lea and Choi, Hongsuk and Zhang, Anthony and Yi, Brent and Malik, Jitendra and Kanazawa, Angjoo}, title = {Reconstructing People, Places, and Cameras}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21948-21958} }
InPO: Inversion Preference Optimization with Reparametrized DDIM for Efficient Diffusion Model Alignment: Yunhong Lu,

Qichao Wang,

Hengyuan Cao,

Xierui Wang,

Xiaoyin Xu,

Min Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2025_CVPR, author = {Lu, Yunhong and Wang, Qichao and Cao, Hengyuan and Wang, Xierui and Xu, Xiaoyin and Zhang, Min}, title = {InPO: Inversion Preference Optimization with Reparametrized DDIM for Efficient Diffusion Model Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28629-28639} }
Identifying and Mitigating Spurious Correlation in Multi-Task Learning: Junyi Chai,

Shenyu Lu,

Xiaoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chai_2025_CVPR, author = {Chai, Junyi and Lu, Shenyu and Wang, Xiaoqian}, title = {Identifying and Mitigating Spurious Correlation in Multi-Task Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25698-25707} }
Immune: Improving Safety Against Jailbreaks in Multi-modal LLMs via Inference-Time Alignment: Soumya Suvra Ghosal,

Souradip Chakraborty,

Vaibhav Singh,

Tianrui Guan,

Mengdi Wang,

Ahmad Beirami,

Furong Huang,

Alvaro Velasquez,

Dinesh Manocha,

Amrit Singh Bedi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosal_2025_CVPR, author = {Ghosal, Soumya Suvra and Chakraborty, Souradip and Singh, Vaibhav and Guan, Tianrui and Wang, Mengdi and Beirami, Ahmad and Huang, Furong and Velasquez, Alvaro and Manocha, Dinesh and Bedi, Amrit Singh}, title = {Immune: Improving Safety Against Jailbreaks in Multi-modal LLMs via Inference-Time Alignment}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25038-25049} }
CustomKD: Customizing Large Vision Foundation for Edge Model Improvement via Knowledge Distillation: Jungsoo Lee,

Debasmit Das,

Munawar Hayat,

Sungha Choi,

Kyuwoong Hwang,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Jungsoo and Das, Debasmit and Hayat, Munawar and Choi, Sungha and Hwang, Kyuwoong and Porikli, Fatih}, title = {CustomKD: Customizing Large Vision Foundation for Edge Model Improvement via Knowledge Distillation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25176-25186} }
PMNI: Pose-free Multi-view Normal Integration for Reflective and Textureless Surface Reconstruction: Mingzhi Pei,

Xu Cao,

Xiangyi Wang,

Heng Guo,

Zhanyu Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pei_2025_CVPR, author = {Pei, Mingzhi and Cao, Xu and Wang, Xiangyi and Guo, Heng and Ma, Zhanyu}, title = {PMNI: Pose-free Multi-view Normal Integration for Reflective and Textureless Surface Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26834-26843} }
LeanGaussian: Breaking Pixel or Point Cloud Correspondence in Modeling 3D Gaussians: Jiamin Wu,

Kenkun Liu,

Han Gao,

Xiaoke Jiang,

Yuan Yao,

Lei Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Jiamin and Liu, Kenkun and Gao, Han and Jiang, Xiaoke and Yao, Yuan and Zhang, Lei}, title = {LeanGaussian: Breaking Pixel or Point Cloud Correspondence in Modeling 3D Gaussians}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26641-26651} }
Modeling Multiple Normal Action Representations for Error Detection in Procedural Tasks: Wei-Jin Huang,

Yuan-Ming Li,

Zhi-Wei Xia,

Yu-Ming Tang,

Kun-Yu Lin,

Jian-Fang Hu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Wei-Jin and Li, Yuan-Ming and Xia, Zhi-Wei and Tang, Yu-Ming and Lin, Kun-Yu and Hu, Jian-Fang and Zheng, Wei-Shi}, title = {Modeling Multiple Normal Action Representations for Error Detection in Procedural Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27794-27804} }
A Unified Latent Schrodinger Bridge Diffusion Model for Unsupervised Anomaly Detection and Localization: Shilhora Akshay,

Niveditha Lakshmi Narasimhan,

Jacob George,

Vineeth N Balasubramanian; [pdf] [supp]
[bibtex]
@InProceedings{Akshay_2025_CVPR, author = {Akshay, Shilhora and Narasimhan, Niveditha Lakshmi and George, Jacob and Balasubramanian, Vineeth N}, title = {A Unified Latent Schrodinger Bridge Diffusion Model for Unsupervised Anomaly Detection and Localization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25528-25538} }
MambaVision: A Hybrid Mamba-Transformer Vision Backbone: Ali Hatamizadeh,

Jan Kautz; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hatamizadeh_2025_CVPR, author = {Hatamizadeh, Ali and Kautz, Jan}, title = {MambaVision: A Hybrid Mamba-Transformer Vision Backbone}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25261-25270} }
Multi-Label Prototype Visual Spatial Search for Weakly Supervised Semantic Segmentation: Songsong Duan,

Xi Yang,

Nannan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Duan_2025_CVPR, author = {Duan, Songsong and Yang, Xi and Wang, Nannan}, title = {Multi-Label Prototype Visual Spatial Search for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30241-30250} }
Doppelgangers++: Improved Visual Disambiguation with Geometric 3D Features: Yuanbo Xiangli,

Ruojin Cai,

Hanyu Chen,

Jeffrey Byrne,

Noah Snavely; [pdf]
[bibtex]
@InProceedings{Xiangli_2025_CVPR, author = {Xiangli, Yuanbo and Cai, Ruojin and Chen, Hanyu and Byrne, Jeffrey and Snavely, Noah}, title = {Doppelgangers++: Improved Visual Disambiguation with Geometric 3D Features}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27166-27175} }
Learnable Infinite Taylor Gaussian for Dynamic View Rendering: Bingbing Hu,

Yanyan Li,

Rui Xie,

Bo Xu,

Haoye Dong,

Junfeng Yao,

Gim Hee Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Bingbing and Li, Yanyan and Xie, Rui and Xu, Bo and Dong, Haoye and Yao, Junfeng and Lee, Gim Hee}, title = {Learnable Infinite Taylor Gaussian for Dynamic View Rendering}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26844-26854} }
SaMam: Style-aware State Space Model for Arbitrary Image Style Transfer: Hongda Liu,

Longguang Wang,

Ye Zhang,

Ziru Yu,

Yulan Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hongda and Wang, Longguang and Zhang, Ye and Yu, Ziru and Guo, Yulan}, title = {SaMam: Style-aware State Space Model for Arbitrary Image Style Transfer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28468-28478} }
Making Old Film Great Again: Degradation-aware State Space Model for Old Film Restoration: Yudong Mao,

Hao Luo,

Zhiwei Zhong,

Peilin Chen,

Zhijiang Zhang,

Shiqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2025_CVPR, author = {Mao, Yudong and Luo, Hao and Zhong, Zhiwei and Chen, Peilin and Zhang, Zhijiang and Wang, Shiqi}, title = {Making Old Film Great Again: Degradation-aware State Space Model for Old Film Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28039-28049} }
MP-SfM: Monocular Surface Priors for Robust Structure-from-Motion: Zador Pataki,

Paul-Edouard Sarlin,

Johannes L. Schönberger,

Marc Pollefeys; [pdf] [supp]
[bibtex]
@InProceedings{Pataki_2025_CVPR, author = {Pataki, Zador and Sarlin, Paul-Edouard and Sch\"onberger, Johannes L. and Pollefeys, Marc}, title = {MP-SfM: Monocular Surface Priors for Robust Structure-from-Motion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21891-21901} }
Deterministic-to-Stochastic Diverse Latent Feature Mapping for Human Motion Synthesis: Yu Hua,

Weiming Liu,

Gui Xu,

Yaqing Hou,

Yew-Soon Ong,

Qiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hua_2025_CVPR, author = {Hua, Yu and Liu, Weiming and Xu, Gui and Hou, Yaqing and Ong, Yew-Soon and Zhang, Qiang}, title = {Deterministic-to-Stochastic Diverse Latent Feature Mapping for Human Motion Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22724-22734} }
CacheQuant: Comprehensively Accelerated Diffusion Models: Xuewen Liu,

Zhikai Li,

Qingyi Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xuewen and Li, Zhikai and Gu, Qingyi}, title = {CacheQuant: Comprehensively Accelerated Diffusion Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23269-23280} }
Open-World Objectness Modeling Unifies Novel Object Detection: Shan Zhang,

Yao Ni,

Jinhao Du,

Yuan Xue,

Philip Torr,

Piotr Koniusz,

Anton van den Hengel; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Shan and Ni, Yao and Du, Jinhao and Xue, Yuan and Torr, Philip and Koniusz, Piotr and van den Hengel, Anton}, title = {Open-World Objectness Modeling Unifies Novel Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30332-30342} }
MotionPRO: Exploring the Role of Pressure in Human MoCap and Beyond: Shenghao Ren,

Yi Lu,

Jiayi Huang,

Jiayi Zhao,

He Zhang,

Tao Yu,

Qiu Shen,

Xun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2025_CVPR, author = {Ren, Shenghao and Lu, Yi and Huang, Jiayi and Zhao, Jiayi and Zhang, He and Yu, Tao and Shen, Qiu and Cao, Xun}, title = {MotionPRO: Exploring the Role of Pressure in Human MoCap and Beyond}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27760-27770} }
DiffVsgg: Diffusion-Driven Online Video Scene Graph Generation: Mu Chen,

Liulei Li,

Wenguan Wang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Mu and Li, Liulei and Wang, Wenguan and Yang, Yi}, title = {DiffVsgg: Diffusion-Driven Online Video Scene Graph Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29161-29172} }
Towards Smart Point-and-Shoot Photography: Jiawan Li,

Fei Zhou,

Zhipeng Zhong,

Jiongzhi Lin,

Guoping Qiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jiawan and Zhou, Fei and Zhong, Zhipeng and Lin, Jiongzhi and Qiu, Guoping}, title = {Towards Smart Point-and-Shoot Photography}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28242-28251} }
Prototype-Based Image Prompting for Weakly Supervised Histopathological Image Segmentation: Qingchen Tang,

Lei Fan,

Maurice Pagnucco,

Yang Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2025_CVPR, author = {Tang, Qingchen and Fan, Lei and Pagnucco, Maurice and Song, Yang}, title = {Prototype-Based Image Prompting for Weakly Supervised Histopathological Image Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30271-30280} }
Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation: Jiaming Zhou,

Teli Ma,

Kun-Yu Lin,

Zifan Wang,

Ronghe Qiu,

Junwei Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Jiaming and Ma, Teli and Lin, Kun-Yu and Wang, Zifan and Qiu, Ronghe and Liang, Junwei}, title = {Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22551-22561} }
SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language: Zehan Wang,

Sashuai Zhou,

Shaoxuan He,

Haifeng Huang,

Lihe Yang,

Ziang Zhang,

Xize Cheng,

Shengpeng Ji,

Tao Jin,

Hengshuang Zhao,

Zhou Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Zehan and Zhou, Sashuai and He, Shaoxuan and Huang, Haifeng and Yang, Lihe and Zhang, Ziang and Cheng, Xize and Ji, Shengpeng and Jin, Tao and Zhao, Hengshuang and Zhao, Zhou}, title = {SpatialCLIP: Learning 3D-aware Image Representations from Spatially Discriminative Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29656-29666} }
Mono2Stereo: A Benchmark and Empirical Study for Stereo Conversion: Songsong Yu,

Yuxin Chen,

Zhongang Qi,

Zeke Xie,

Yifan Wang,

Lijun Wang,

Ying Shan,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2025_CVPR, author = {Yu, Songsong and Chen, Yuxin and Qi, Zhongang and Xie, Zeke and Wang, Yifan and Wang, Lijun and Shan, Ying and Lu, Huchuan}, title = {Mono2Stereo: A Benchmark and Empirical Study for Stereo Conversion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21847-21856} }
SoftShadow: Leveraging Soft Masks for Penumbra-Aware Shadow Removal: Xinrui Wang,

Lanqing Guo,

Xiyu Wang,

Siyu Huang,

Bihan Wen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xinrui and Guo, Lanqing and Wang, Xiyu and Huang, Siyu and Wen, Bihan}, title = {SoftShadow: Leveraging Soft Masks for Penumbra-Aware Shadow Removal}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23217-23226} }
VTON-HandFit: Virtual Try-on for Arbitrary Hand Pose Guided by Hand Priors Embedding: Yujie Liang,

Xiaobin Hu,

Boyuan Jiang,

Donghao Luo,

Xu Peng,

Kai Wu,

Chengming Xu,

Wenhui Han,

Taisong Jin,

Chengjie Wang,

Rongrong Ji; [pdf]
[bibtex]
@InProceedings{Liang_2025_CVPR, author = {Liang, Yujie and Hu, Xiaobin and Jiang, Boyuan and Luo, Donghao and Peng, Xu and Wu, Kai and Xu, Chengming and Han, Wenhui and Jin, Taisong and Wang, Chengjie and Ji, Rongrong}, title = {VTON-HandFit: Virtual Try-on for Arbitrary Hand Pose Guided by Hand Priors Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22616-22626} }
Uni-Renderer: Unifying Rendering and Inverse Rendering Via Dual Stream Diffusion: Zhifei Chen,

Tianshuo Xu,

Wenhang Ge,

Leyi Wu,

Dongyu Yan,

Jing He,

Luozhou Wang,

Lu Zeng,

Shunsi Zhang,

Ying-Cong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Zhifei and Xu, Tianshuo and Ge, Wenhang and Wu, Leyi and Yan, Dongyu and He, Jing and Wang, Luozhou and Zeng, Lu and Zhang, Shunsi and Chen, Ying-Cong}, title = {Uni-Renderer: Unifying Rendering and Inverse Rendering Via Dual Stream Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26504-26513} }
POSTA: A Go-to Framework for Customized Artistic Poster Generation: Haoyu Chen,

Xiaojie Xu,

Wenbo Li,

Jingjing Ren,

Tian Ye,

Songhua Liu,

Ying-Cong Chen,

Lei Zhu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Haoyu and Xu, Xiaojie and Li, Wenbo and Ren, Jingjing and Ye, Tian and Liu, Songhua and Chen, Ying-Cong and Zhu, Lei and Wang, Xinchao}, title = {POSTA: A Go-to Framework for Customized Artistic Poster Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28694-28704} }
NSD-Imagery: A Benchmark Dataset for Extending fMRI Vision Decoding Methods to Mental Imagery: Reese Kneeland,

Paul S. Scotti,

Ghislain St-Yves,

Jesse Breedlove,

Kendrick Kay,

Thomas Naselaris; [pdf] [supp]
[bibtex]
@InProceedings{Kneeland_2025_CVPR, author = {Kneeland, Reese and Scotti, Paul S. and St-Yves, Ghislain and Breedlove, Jesse and Kay, Kendrick and Naselaris, Thomas}, title = {NSD-Imagery: A Benchmark Dataset for Extending fMRI Vision Decoding Methods to Mental Imagery}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28852-28862} }
VLsI: Verbalized Layers-to-Interactions from Large to Small Vision Language Models: Byung-Kwan Lee,

Ryo Hachiuma,

Yu-Chiang Frank Wang,

Yong Man Ro,

Yueh-Hua Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2025_CVPR, author = {Lee, Byung-Kwan and Hachiuma, Ryo and Wang, Yu-Chiang Frank and Ro, Yong Man and Wu, Yueh-Hua}, title = {VLsI: Verbalized Layers-to-Interactions from Large to Small Vision Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29545-29557} }
Just Dance with pi! A Poly-modal Inductor for Weakly-supervised Video Anomaly Detection: Snehashis Majhi,

Giacomo D'Amicantonio,

Antitza Dantcheva,

Quan Kong,

Lorenzo Garattoni,

Gianpiero Francesca,

Egor Bondarev,

Francois Bremond; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Majhi_2025_CVPR, author = {Majhi, Snehashis and D'Amicantonio, Giacomo and Dantcheva, Antitza and Kong, Quan and Garattoni, Lorenzo and Francesca, Gianpiero and Bondarev, Egor and Bremond, Francois}, title = {Just Dance with pi! A Poly-modal Inductor for Weakly-supervised Video Anomaly Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24265-24274} }
Efficient Motion-Aware Video MLLM: Zijia Zhao,

Yuqi Huo,

Tongtian Yue,

Longteng Guo,

Haoyu Lu,

Bingning Wang,

Weipeng Chen,

Jing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2025_CVPR, author = {Zhao, Zijia and Huo, Yuqi and Yue, Tongtian and Guo, Longteng and Lu, Haoyu and Wang, Bingning and Chen, Weipeng and Liu, Jing}, title = {Efficient Motion-Aware Video MLLM}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24159-24168} }
Zero-Shot 4D Lidar Panoptic Segmentation: Yushan Zhang,

Aljoša Ošep,

Laura Leal-Taixé,

Tim Meinhardt; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Yushan and O\v{s}ep, Aljo\v{s}a and Leal-Taix\'e, Laura and Meinhardt, Tim}, title = {Zero-Shot 4D Lidar Panoptic Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24506-24517} }
ADU: Adaptive Detection of Unknown Categories in Black-Box Domain Adaptation: Yushan Lai,

Guowen Li,

Haoyuan Liang,

Juepeng Zheng,

Zhiyu Ye; [pdf] [supp]
[bibtex]
@InProceedings{Lai_2025_CVPR, author = {Lai, Yushan and Li, Guowen and Liang, Haoyuan and Zheng, Juepeng and Ye, Zhiyu}, title = {ADU: Adaptive Detection of Unknown Categories in Black-Box Domain Adaptation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30588-30598} }
EmotiveTalk: Expressive Talking Head Generation through Audio Information Decoupling and Emotional Video Diffusion: Haotian Wang,

Yuzhe Weng,

Yueyan Li,

Zilu Guo,

Jun Du,

Shutong Niu,

Jiefeng Ma,

Shan He,

Xiaoyan Wu,

Qiming Hu,

Bing Yin,

Cong Liu,

Qingfeng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Haotian and Weng, Yuzhe and Li, Yueyan and Guo, Zilu and Du, Jun and Niu, Shutong and Ma, Jiefeng and He, Shan and Wu, Xiaoyan and Hu, Qiming and Yin, Bing and Liu, Cong and Liu, Qingfeng}, title = {EmotiveTalk: Expressive Talking Head Generation through Audio Information Decoupling and Emotional Video Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26212-26221} }
Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning: Tim Lenz,

Peter Neidlinger,

Marta Ligero,

Georg Wölflein,

Marko van Treeck,

Jakob N. Kather; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lenz_2025_CVPR, author = {Lenz, Tim and Neidlinger, Peter and Ligero, Marta and W\"olflein, Georg and van Treeck, Marko and Kather, Jakob N.}, title = {Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30807-30817} }
UNIALIGN: Scaling Multimodal Alignment within One Unified Model: Bo Zhou,

Liulei Li,

Yujia Wang,

Huafeng Liu,

Yazhou Yao,

Wenguan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2025_CVPR, author = {Zhou, Bo and Li, Liulei and Wang, Yujia and Liu, Huafeng and Yao, Yazhou and Wang, Wenguan}, title = {UNIALIGN: Scaling Multimodal Alignment within One Unified Model}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29644-29655} }
ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual Instructions: Tomáš Souček,

Prajwal Gatti,

Michael Wray,

Ivan Laptev,

Dima Damen,

Josef Sivic; [pdf] [supp]
[bibtex]
@InProceedings{Soucek_2025_CVPR, author = {Sou\v{c}ek, Tom\'a\v{s} and Gatti, Prajwal and Wray, Michael and Laptev, Ivan and Damen, Dima and Sivic, Josef}, title = {ShowHowTo: Generating Scene-Conditioned Step-by-Step Visual Instructions}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27435-27445} }
Exploration-Driven Generative Interactive Environments: Nedko Savov,

Naser Kazemi,

Mohammad Mahdi,

Danda Pani Paudel,

Xi Wang,

Luc Van Gool; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Savov_2025_CVPR, author = {Savov, Nedko and Kazemi, Naser and Mahdi, Mohammad and Paudel, Danda Pani and Wang, Xi and Van Gool, Luc}, title = {Exploration-Driven Generative Interactive Environments}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27597-27607} }
DreamText: High Fidelity Scene Text Synthesis: Yibin Wang,

Weizhong Zhang,

Honghui Xu,

Cheng Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yibin and Zhang, Weizhong and Xu, Honghui and Jin, Cheng}, title = {DreamText: High Fidelity Scene Text Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28555-28563} }
ProKeR: A Kernel Perspective on Few-Shot Adaptation of Large Vision-Language Models: Yassir Bendou,

Amine Ouasfi,

Vincent Gripon,

Adnane Boukhayma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bendou_2025_CVPR, author = {Bendou, Yassir and Ouasfi, Amine and Gripon, Vincent and Boukhayma, Adnane}, title = {ProKeR: A Kernel Perspective on Few-Shot Adaptation of Large Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25092-25102} }
MonoTAKD: Teaching Assistant Knowledge Distillation for Monocular 3D Object Detection: Hou-I Liu,

Christine Wu,

Jen-Hao Cheng,

Wenhao Chai,

Shian-Yun Wang,

Gaowen Liu,

Hugo Latapie,

Jhih-Ciang Wu,

Jenq-Neng Hwang,

Hong-Han Shuai,

Wen-Huang Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Hou-I and Wu, Christine and Cheng, Jen-Hao and Chai, Wenhao and Wang, Shian-Yun and Liu, Gaowen and Latapie, Hugo and Wu, Jhih-Ciang and Hwang, Jenq-Neng and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {MonoTAKD: Teaching Assistant Knowledge Distillation for Monocular 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22266-22275} }
Easy-editable Image Vectorization with Multi-layer Multi-scale Distributed Visual Feature Embedding: Ye Chen,

Zhangli Hu,

Zhongyin Zhao,

Yupeng Zhu,

Yue Shi,

Yuxuan Xiong,

Bingbing Ni; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Ye and Hu, Zhangli and Zhao, Zhongyin and Zhu, Yupeng and Shi, Yue and Xiong, Yuxuan and Ni, Bingbing}, title = {Easy-editable Image Vectorization with Multi-layer Multi-scale Distributed Visual Feature Embedding}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23345-23354} }
Acquire and then Adapt: Squeezing out Text-to-Image Model for Image Restoration: Junyuan Deng,

Xinyi Wu,

Yongxing Yang,

Congchao Zhu,

Song Wang,

Zhenyao Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2025_CVPR, author = {Deng, Junyuan and Wu, Xinyi and Yang, Yongxing and Zhu, Congchao and Wang, Song and Wu, Zhenyao}, title = {Acquire and then Adapt: Squeezing out Text-to-Image Model for Image Restoration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23195-23206} }
Devils in Middle Layers of Large Vision-Language Models: Interpreting, Detecting and Mitigating Object Hallucinations via Attention Lens: Zhangqi Jiang,

Junkai Chen,

Beier Zhu,

Tingjin Luo,

Yankun Shen,

Xu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Zhangqi and Chen, Junkai and Zhu, Beier and Luo, Tingjin and Shen, Yankun and Yang, Xu}, title = {Devils in Middle Layers of Large Vision-Language Models: Interpreting, Detecting and Mitigating Object Hallucinations via Attention Lens}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25004-25014} }
SpectroMotion: Dynamic 3D Reconstruction of Specular Scenes: Cheng-De Fan,

Chen-Wei Chang,

Yi-Ruei Liu,

Jie-Ying Lee,

Jiun-Long Huang,

Yu-Chee Tseng,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2025_CVPR, author = {Fan, Cheng-De and Chang, Chen-Wei and Liu, Yi-Ruei and Lee, Jie-Ying and Huang, Jiun-Long and Tseng, Yu-Chee and Liu, Yu-Lun}, title = {SpectroMotion: Dynamic 3D Reconstruction of Specular Scenes}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21328-21338} }
VTON 360: High-Fidelity Virtual Try-On from Any Viewing Direction: Zijian He,

Yuwei Ning,

Yipeng Qin,

Guangrun Wang,

Sibei Yang,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Zijian and Ning, Yuwei and Qin, Yipeng and Wang, Guangrun and Yang, Sibei and Lin, Liang and Li, Guanbin}, title = {VTON 360: High-Fidelity Virtual Try-On from Any Viewing Direction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26388-26398} }
MVBoost: Boost 3D Reconstruction with Multi-View Refinement: Xiangyu Liu,

Xiaomei Zhang,

Zhiyuan Ma,

Xiangyu Zhu,

Zhen Lei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Xiangyu and Zhang, Xiaomei and Ma, Zhiyuan and Zhu, Xiangyu and Lei, Zhen}, title = {MVBoost: Boost 3D Reconstruction with Multi-View Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21664-21673} }
Category-Agnostic Neural Object Rigging: Guangzhao He,

Chen Geng,

Shangzhe Wu,

Jiajun Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2025_CVPR, author = {He, Guangzhao and Geng, Chen and Wu, Shangzhe and Wu, Jiajun}, title = {Category-Agnostic Neural Object Rigging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22078-22088} }
POPEN: Preference-Based Optimization and Ensemble for LVLM-Based Reasoning Segmentation: Lanyun Zhu,

Tianrun Chen,

Qianxiong Xu,

Xuanyi Liu,

Deyi Ji,

Haiyang Wu,

De Wen Soh,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Lanyun and Chen, Tianrun and Xu, Qianxiong and Liu, Xuanyi and Ji, Deyi and Wu, Haiyang and Soh, De Wen and Liu, Jun}, title = {POPEN: Preference-Based Optimization and Ensemble for LVLM-Based Reasoning Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30231-30240} }
MMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis: Ho Kei Cheng,

Masato Ishii,

Akio Hayakawa,

Takashi Shibuya,

Alexander Schwing,

Yuki Mitsufuji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2025_CVPR, author = {Cheng, Ho Kei and Ishii, Masato and Hayakawa, Akio and Shibuya, Takashi and Schwing, Alexander and Mitsufuji, Yuki}, title = {MMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28901-28911} }
Mimic In-Context Learning for Multimodal Tasks: Yuchu Jiang,

Jiale Fu,

Chenduo Hao,

Xinting Hu,

Yingzhe Peng,

Xin Geng,

Xu Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2025_CVPR, author = {Jiang, Yuchu and Fu, Jiale and Hao, Chenduo and Hu, Xinting and Peng, Yingzhe and Geng, Xin and Yang, Xu}, title = {Mimic In-Context Learning for Multimodal Tasks}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29825-29835} }
Vision-Language Models Do Not Understand Negation: Kumail Alhamoud,

Shaden Alshammari,

Yonglong Tian,

Guohao Li,

Philip H.S. Torr,

Yoon Kim,

Marzyeh Ghassemi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alhamoud_2025_CVPR, author = {Alhamoud, Kumail and Alshammari, Shaden and Tian, Yonglong and Li, Guohao and Torr, Philip H.S. and Kim, Yoon and Ghassemi, Marzyeh}, title = {Vision-Language Models Do Not Understand Negation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29612-29622} }
NexusGS: Sparse View Synthesis with Epipolar Depth Priors in 3D Gaussian Splatting: Yulong Zheng,

Zicheng Jiang,

Shengfeng He,

Yandu Sun,

Junyu Dong,

Huaidong Zhang,

Yong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2025_CVPR, author = {Zheng, Yulong and Jiang, Zicheng and He, Shengfeng and Sun, Yandu and Dong, Junyu and Zhang, Huaidong and Du, Yong}, title = {NexusGS: Sparse View Synthesis with Epipolar Depth Priors in 3D Gaussian Splatting}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26800-26809} }
HyperNet Fields: Efficiently Training Hypernetworks without Ground Truth by Learning Weight Trajectories: Eric Hedlin,

Munawar Hayat,

Fatih Porikli,

Kwang Moo Yi,

Shweta Mahajan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hedlin_2025_CVPR, author = {Hedlin, Eric and Hayat, Munawar and Porikli, Fatih and Yi, Kwang Moo and Mahajan, Shweta}, title = {HyperNet Fields: Efficiently Training Hypernetworks without Ground Truth by Learning Weight Trajectories}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22129-22138} }
RICCARDO: Radar Hit Prediction and Convolution for Camera-Radar 3D Object Detection: Yunfei Long,

Abhinav Kumar,

Xiaoming Liu,

Daniel Morris; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2025_CVPR, author = {Long, Yunfei and Kumar, Abhinav and Liu, Xiaoming and Morris, Daniel}, title = {RICCARDO: Radar Hit Prediction and Convolution for Camera-Radar 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22276-22285} }
BLADE: Single-view Body Mesh Estimation through Accurate Depth Estimation: Shengze Wang,

Jiefeng Li,

Tianye Li,

Ye Yuan,

Henry Fuchs,

Koki Nagano,

Shalini De Mello,

Michael Stengel; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shengze and Li, Jiefeng and Li, Tianye and Yuan, Ye and Fuchs, Henry and Nagano, Koki and De Mello, Shalini and Stengel, Michael}, title = {BLADE: Single-view Body Mesh Estimation through Accurate Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21991-22000} }
MoEE: Mixture of Emotion Experts for Audio-Driven Portrait Animation: Huaize Liu,

Wenzhang Sun,

Donglin Di,

Shibo Sun,

Jiahui Yang,

Changqing Zou,

Hujun Bao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Huaize and Sun, Wenzhang and Di, Donglin and Sun, Shibo and Yang, Jiahui and Zou, Changqing and Bao, Hujun}, title = {MoEE: Mixture of Emotion Experts for Audio-Driven Portrait Animation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26222-26231} }
ReCap: Better Gaussian Relighting with Cross-Environment Captures: Jingzhi Li,

Zongwei Wu,

Eduard Zamfir,

Radu Timofte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Jingzhi and Wu, Zongwei and Zamfir, Eduard and Timofte, Radu}, title = {ReCap: Better Gaussian Relighting with Cross-Environment Captures}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21307-21316} }
Vision-Language Embodiment for Monocular Depth Estimation: Jinchang Zhang,

Guoyu Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Jinchang and Lu, Guoyu}, title = {Vision-Language Embodiment for Monocular Depth Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29479-29489} }
Frequency Dynamic Convolution for Dense Image Prediction: Linwei Chen,

Lin Gu,

Liang Li,

Chenggang Yan,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Linwei and Gu, Lin and Li, Liang and Yan, Chenggang and Fu, Ying}, title = {Frequency Dynamic Convolution for Dense Image Prediction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30178-30188} }
IDEA: Inverted Text with Cooperative Deformable Aggregation for Multi-modal Object Re-Identification: Yuhao Wang,

Yongfeng Lv,

Pingping Zhang,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yuhao and Lv, Yongfeng and Zhang, Pingping and Lu, Huchuan}, title = {IDEA: Inverted Text with Cooperative Deformable Aggregation for Multi-modal Object Re-Identification}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29701-29710} }
Consistency Posterior Sampling for Diverse Image Synthesis: Vishal Purohit,

Matthew Repasky,

Jianfeng Lu,

Qiang Qiu,

Yao Xie,

Xiuyuan Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Purohit_2025_CVPR, author = {Purohit, Vishal and Repasky, Matthew and Lu, Jianfeng and Qiu, Qiang and Xie, Yao and Cheng, Xiuyuan}, title = {Consistency Posterior Sampling for Diverse Image Synthesis}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28327-28336} }
IMFine: 3D Inpainting via Geometry-guided Multi-view Refinement: Zhihao Shi,

Dong Huo,

Yuhongze Zhou,

Yan Min,

Juwei Lu,

Xinxin Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2025_CVPR, author = {Shi, Zhihao and Huo, Dong and Zhou, Yuhongze and Min, Yan and Lu, Juwei and Zuo, Xinxin}, title = {IMFine: 3D Inpainting via Geometry-guided Multi-view Refinement}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26694-26703} }
DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge: Sabbir Ahmed,

Abdullah Al Arafat,

Deniz Najafi,

Akhlak Mahmood,

Mamshad Nayeem Rizve,

Mohaiminul Al Nahian,

Ranyang Zhou,

Shaahin Angizi,

Adnan Siraj Rakin; [pdf]
[bibtex]
@InProceedings{Ahmed_2025_CVPR, author = {Ahmed, Sabbir and Al Arafat, Abdullah and Najafi, Deniz and Mahmood, Akhlak and Rizve, Mamshad Nayeem and Al Nahian, Mohaiminul and Zhou, Ranyang and Angizi, Shaahin and Rakin, Adnan Siraj}, title = {DeepCompress-ViT: Rethinking Model Compression to Enhance Efficiency of Vision Transformers at the Edge}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30147-30156} }
EvOcc: Accurate Semantic Occupancy for Automated Driving Using Evidence Theory: Jonas Kälble,

Sascha Wirges,

Maxim Tatarchenko,

Eddy Ilg; [pdf] [supp]
[bibtex]
@InProceedings{Kalble_2025_CVPR, author = {K\"alble, Jonas and Wirges, Sascha and Tatarchenko, Maxim and Ilg, Eddy}, title = {EvOcc: Accurate Semantic Occupancy for Automated Driving Using Evidence Theory}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27467-27476} }
Towards Continual Universal Segmentation: Zihan Lin,

Zilei Wang,

Xu Wang; [pdf]
[bibtex]
@InProceedings{Lin_2025_CVPR, author = {Lin, Zihan and Wang, Zilei and Wang, Xu}, title = {Towards Continual Universal Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29417-29427} }
PGC: Physics-Based Gaussian Cloth from a Single Pose: Michelle Guo,

Matt Jen-Yuan Chiang,

Igor Santesteban,

Nikolaos Sarafianos,

Hsiao-yu Chen,

Oshri Halimi,

Aljaž Božič,

Shunsuke Saito,

Jiajun Wu,

C. Karen Liu,

Tuur Stuyck,

Egor Larionov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2025_CVPR, author = {Guo, Michelle and Chiang, Matt Jen-Yuan and Santesteban, Igor and Sarafianos, Nikolaos and Chen, Hsiao-yu and Halimi, Oshri and Bo\v{z}i\v{c}, Alja\v{z} and Saito, Shunsuke and Wu, Jiajun and Liu, C. Karen and Stuyck, Tuur and Larionov, Egor}, title = {PGC: Physics-Based Gaussian Cloth from a Single Pose}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21215-21225} }
OFER: Occluded Face Expression Reconstruction: Pratheba Selvaraju,

Victoria Fernandez Abrevaya,

Timo Bolkart,

Rick Akkerman,

Tianyu Ding,

Faezeh Amjadi,

Ilya Zharkov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Selvaraju_2025_CVPR, author = {Selvaraju, Pratheba and Abrevaya, Victoria Fernandez and Bolkart, Timo and Akkerman, Rick and Ding, Tianyu and Amjadi, Faezeh and Zharkov, Ilya}, title = {OFER: Occluded Face Expression Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26985-26995} }
Cubify Anything: Scaling Indoor 3D Object Detection: Justin Lazarow,

David Griffiths,

Gefen Kohavi,

Francisco Crespo,

Afshin Dehghan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lazarow_2025_CVPR, author = {Lazarow, Justin and Griffiths, David and Kohavi, Gefen and Crespo, Francisco and Dehghan, Afshin}, title = {Cubify Anything: Scaling Indoor 3D Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22225-22233} }
DEAL: Data-Efficient Adversarial Learning for High-Quality Infrared Imaging: Zhu Liu,

Zijun Wang,

Jinyuan Liu,

Fanqi Meng,

Long Ma,

Risheng Liu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Zhu and Wang, Zijun and Liu, Jinyuan and Meng, Fanqi and Ma, Long and Liu, Risheng}, title = {DEAL: Data-Efficient Adversarial Learning for High-Quality Infrared Imaging}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28198-28207} }
BimArt: A Unified Approach for the Synthesis of 3D Bimanual Interaction with Articulated Objects: Wanyue Zhang,

Rishabh Dabral,

Vladislav Golyanik,

Vasileios Choutas,

Eduardo Alvarado,

Thabo Beeler,

Marc Habermann,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Wanyue and Dabral, Rishabh and Golyanik, Vladislav and Choutas, Vasileios and Alvarado, Eduardo and Beeler, Thabo and Habermann, Marc and Theobalt, Christian}, title = {BimArt: A Unified Approach for the Synthesis of 3D Bimanual Interaction with Articulated Objects}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27694-27705} }
CoSpace: Benchmarking Continuous Space Perception Ability for Vision-Language Models: Yiqi Zhu,

Ziyue Wang,

Can Zhang,

Peng Li,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2025_CVPR, author = {Zhu, Yiqi and Wang, Ziyue and Zhang, Can and Li, Peng and Liu, Yang}, title = {CoSpace: Benchmarking Continuous Space Perception Ability for Vision-Language Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29569-29579} }
FreeTimeGS: Free Gaussian Primitives at Anytime Anywhere for Dynamic Scene Reconstruction: Yifan Wang,

Peishan Yang,

Zhen Xu,

Jiaming Sun,

Zhanhua Zhang,

Yong Chen,

Hujun Bao,

Sida Peng,

Xiaowei Zhou; [pdf]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Yifan and Yang, Peishan and Xu, Zhen and Sun, Jiaming and Zhang, Zhanhua and Chen, Yong and Bao, Hujun and Peng, Sida and Zhou, Xiaowei}, title = {FreeTimeGS: Free Gaussian Primitives at Anytime Anywhere for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21750-21760} }
Show and Tell: Visually Explainable Deep Neural Nets via Spatially-Aware Concept Bottleneck Models: Itay Benou,

Tammy Riklin Raviv; [pdf] [supp]
[bibtex]
@InProceedings{Benou_2025_CVPR, author = {Benou, Itay and Raviv, Tammy Riklin}, title = {Show and Tell: Visually Explainable Deep Neural Nets via Spatially-Aware Concept Bottleneck Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30063-30072} }
Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene: Shengqiong Wu,

Hao Fei,

Jingkang Yang,

Xiangtai Li,

Juncheng Li,

Hanwang Zhang,

Tat-seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2025_CVPR, author = {Wu, Shengqiong and Fei, Hao and Yang, Jingkang and Li, Xiangtai and Li, Juncheng and Zhang, Hanwang and Chua, Tat-seng}, title = {Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24539-24549} }
Knowledge Bridger: Towards Training-Free Missing Modality Completion: Guanzhou Ke,

Shengfeng He,

Xiaoli Wang,

Bo Wang,

Guoqing Chao,

Yuanyang Zhang,

Yi Xie,

Hexing Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2025_CVPR, author = {Ke, Guanzhou and He, Shengfeng and Wang, Xiaoli and Wang, Bo and Chao, Guoqing and Zhang, Yuanyang and Xie, Yi and Su, Hexing}, title = {Knowledge Bridger: Towards Training-Free Missing Modality Completion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25864-25873} }
TexGarment: Consistent Garment UV Texture Generation via Efficient 3D Structure-Guided Diffusion Transformer: Jialun Liu,

Jinbo Wu,

Xiaobo Gao,

Jiakui Hu,

Bojun Xiong,

Xing Liu,

Chen Zhao,

Hongbin Pei,

Haocheng Feng,

Yingying Li,

Errui Ding,

Jingdong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Jialun and Wu, Jinbo and Gao, Xiaobo and Hu, Jiakui and Xiong, Bojun and Liu, Xing and Zhao, Chen and Pei, Hongbin and Feng, Haocheng and Li, Yingying and Ding, Errui and Wang, Jingdong}, title = {TexGarment: Consistent Garment UV Texture Generation via Efficient 3D Structure-Guided Diffusion Transformer}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26566-26575} }
Semi-Supervised State-Space Model with Dynamic Stacking Filter for Real-World Video Deraining: Shangquan Sun,

Wenqi Ren,

Juxiang Zhou,

Shu Wang,

Jianhou Gan,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2025_CVPR, author = {Sun, Shangquan and Ren, Wenqi and Zhou, Juxiang and Wang, Shu and Gan, Jianhou and Cao, Xiaochun}, title = {Semi-Supervised State-Space Model with Dynamic Stacking Filter for Real-World Video Deraining}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26114-26124} }
TIDE: Training Locally Interpretable Domain Generalization Models Enables Test-time Correction: Aishwarya Agarwal,

Srikrishna Karanam,

Vineet Gandhi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agarwal_2025_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {TIDE: Training Locally Interpretable Domain Generalization Models Enables Test-time Correction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30210-30220} }
VSNet: Focusing on the Linguistic Characteristics of Sign Language: Yuhao Li,

Xinyue Chen,

Hongkai Li,

Xiaorong Pu,

Peng Jin,

Yazhou Ren; [pdf] [supp]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Yuhao and Chen, Xinyue and Li, Hongkai and Pu, Xiaorong and Jin, Peng and Ren, Yazhou}, title = {VSNet: Focusing on the Linguistic Characteristics of Sign Language}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24320-24330} }
Learning to Sample Effective and Diverse Prompts for Text-to-Image Generation: Taeyoung Yun,

Dinghuai Zhang,

Jinkyoo Park,

Ling Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yun_2025_CVPR, author = {Yun, Taeyoung and Zhang, Dinghuai and Park, Jinkyoo and Pan, Ling}, title = {Learning to Sample Effective and Diverse Prompts for Text-to-Image Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23625-23635} }
Multi-modal Medical Diagnosis via Large-small Model Collaboration: Wanyi Chen,

Zihua Zhao,

Jiangchao Yao,

Ya Zhang,

Jiajun Bu,

Haishuai Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2025_CVPR, author = {Chen, Wanyi and Zhao, Zihua and Yao, Jiangchao and Zhang, Ya and Bu, Jiajun and Wang, Haishuai}, title = {Multi-modal Medical Diagnosis via Large-small Model Collaboration}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30763-30773} }
Image Referenced Sketch Colorization Based on Animation Creation Workflow: Dingkun Yan,

Xinrui Wang,

Zhuoru Li,

Suguru Saito,

Yusuke Iwasawa,

Yutaka Matsuo,

Jiaxian Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2025_CVPR, author = {Yan, Dingkun and Wang, Xinrui and Li, Zhuoru and Saito, Suguru and Iwasawa, Yusuke and Matsuo, Yutaka and Guo, Jiaxian}, title = {Image Referenced Sketch Colorization Based on Animation Creation Workflow}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23391-23400} }
GaPT-DAR: Category-level Garments Pose Tracking via Integrated 2D Deformation and 3D Reconstruction: Li Zhang,

Mingliang Xu,

Jianan Wang,

Qiaojun Yu,

Lixin Yang,

Yonglu Li,

Cewu Lu,

Rujing Wang,

Liu Liu; [pdf]
[bibtex]
@InProceedings{Zhang_2025_CVPR, author = {Zhang, Li and Xu, Mingliang and Wang, Jianan and Yu, Qiaojun and Yang, Lixin and Li, Yonglu and Lu, Cewu and Wang, Rujing and Liu, Liu}, title = {GaPT-DAR: Category-level Garments Pose Tracking via Integrated 2D Deformation and 3D Reconstruction}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22638-22647} }
ProbPose: A Probabilistic Approach to 2D Human Pose Estimation: Miroslav Purkrabek,

Jiri Matas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Purkrabek_2025_CVPR, author = {Purkrabek, Miroslav and Matas, Jiri}, title = {ProbPose: A Probabilistic Approach to 2D Human Pose Estimation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {27124-27133} }
MIDI: Multi-Instance Diffusion for Single Image to 3D Scene Generation: Zehuan Huang,

Yuan-Chen Guo,

Xingqiao An,

Yunhan Yang,

Yangguang Li,

Zi-Xin Zou,

Ding Liang,

Xihui Liu,

Yan-Pei Cao,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zehuan and Guo, Yuan-Chen and An, Xingqiao and Yang, Yunhan and Li, Yangguang and Zou, Zi-Xin and Liang, Ding and Liu, Xihui and Cao, Yan-Pei and Sheng, Lu}, title = {MIDI: Multi-Instance Diffusion for Single Image to 3D Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23646-23657} }
ABC-Former: Auxiliary Bimodal Cross-domain Transformer with Interactive Channel Attention for White Balance: Yu-Cheng Chiu,

Guan-Rong Chen,

Zihao Chen,

Yan-Tsung Peng; [pdf] [supp]
[bibtex]
@InProceedings{Chiu_2025_CVPR, author = {Chiu, Yu-Cheng and Chen, Guan-Rong and Chen, Zihao and Peng, Yan-Tsung}, title = {ABC-Former: Auxiliary Bimodal Cross-domain Transformer with Interactive Channel Attention for White Balance}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21258-21266} }
Fingerprinting Denoising Diffusion Probabilistic Models: Huan Teng,

Yuhui Quan,

Chengyu Wang,

Jun Huang,

Hui Ji; [pdf] [supp]
[bibtex]
@InProceedings{Teng_2025_CVPR, author = {Teng, Huan and Quan, Yuhui and Wang, Chengyu and Huang, Jun and Ji, Hui}, title = {Fingerprinting Denoising Diffusion Probabilistic Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {28811-28820} }
NightAdapter: Learning a Frequency Adapter for Generalizable Night-time Scene Segmentation: Qi Bi,

Jingjun Yi,

Huimin Huang,

Hao Zheng,

Haolan Zhan,

Yawen Huang,

Yuexiang Li,

Xian Wu,

Yefeng Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Bi_2025_CVPR, author = {Bi, Qi and Yi, Jingjun and Huang, Huimin and Zheng, Hao and Zhan, Haolan and Huang, Yawen and Li, Yuexiang and Wu, Xian and Zheng, Yefeng}, title = {NightAdapter: Learning a Frequency Adapter for Generalizable Night-time Scene Segmentation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23838-23849} }
UMFN: Unified Multi-Domain Face Normalization for Joint Cross-domain Prototype Learning and Heterogeneous Face Recognition: Meng Pang,

Wenjun Zhang,

Nanrun Zhou,

Shengbo Chen,

Hong Rao; [pdf]
[bibtex]
@InProceedings{Pang_2025_CVPR, author = {Pang, Meng and Zhang, Wenjun and Zhou, Nanrun and Chen, Shengbo and Rao, Hong}, title = {UMFN: Unified Multi-Domain Face Normalization for Joint Cross-domain Prototype Learning and Heterogeneous Face Recognition}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29299-29308} }
LUCAS: Layered Universal Codec Avatars: Di Liu,

Teng Deng,

Giljoo Nam,

Yu Rong,

Stanislav Pidhorskyi,

Junxuan Li,

Jason Saragih,

Dimitris N. Metaxas,

Chen Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2025_CVPR, author = {Liu, Di and Deng, Teng and Nam, Giljoo and Rong, Yu and Pidhorskyi, Stanislav and Li, Junxuan and Saragih, Jason and Metaxas, Dimitris N. and Cao, Chen}, title = {LUCAS: Layered Universal Codec Avatars}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21127-21137} }
D^3: Scaling Up Deepfake Detection by Learning from Discrepancy: Yongqi Yang,

Zhihao Qian,

Ye Zhu,

Olga Russakovsky,

Yu Wu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Yongqi and Qian, Zhihao and Zhu, Ye and Russakovsky, Olga and Wu, Yu}, title = {D{\textasciicircum}3: Scaling Up Deepfake Detection by Learning from Discrepancy}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23850-23859} }
Jailbreaking the Non-Transferable Barrier via Test-Time Data Disguising: Yongli Xiang,

Ziming Hong,

Lina Yao,

Dadong Wang,

Tongliang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2025_CVPR, author = {Xiang, Yongli and Hong, Ziming and Yao, Lina and Wang, Dadong and Liu, Tongliang}, title = {Jailbreaking the Non-Transferable Barrier via Test-Time Data Disguising}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30671-30681} }
3D-GRAND: A Million-Scale Dataset for 3D-LLMs with Better Grounding and Less Hallucination: Jianing Yang,

Xuweiyi Chen,

Nikhil Madaan,

Madhavan Iyengar,

Shengyi Qian,

David F. Fouhey,

Joyce Chai; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2025_CVPR, author = {Yang, Jianing and Chen, Xuweiyi and Madaan, Nikhil and Iyengar, Madhavan and Qian, Shengyi and Fouhey, David F. and Chai, Joyce}, title = {3D-GRAND: A Million-Scale Dataset for 3D-LLMs with Better Grounding and Less Hallucination}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29501-29512} }
Generative Zero-Shot Composed Image Retrieval: Lan Wang,

Wei Ao,

Vishnu Naresh Boddeti,

Ser-Nam Lim; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Lan and Ao, Wei and Boddeti, Vishnu Naresh and Lim, Ser-Nam}, title = {Generative Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {29690-29700} }
Towards Better Alignment: Training Diffusion Models with Reinforcement Learning Against Sparse Rewards: Zijing Hu,

Fengda Zhang,

Long Chen,

Kun Kuang,

Jiahui Li,

Kaifeng Gao,

Jun Xiao,

Xin Wang,

Wenwu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2025_CVPR, author = {Hu, Zijing and Zhang, Fengda and Chen, Long and Kuang, Kun and Li, Jiahui and Gao, Kaifeng and Xiao, Jun and Wang, Xin and Zhu, Wenwu}, title = {Towards Better Alignment: Training Diffusion Models with Reinforcement Learning Against Sparse Rewards}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {23604-23614} }
Spatial457: A Diagnostic Benchmark for 6D Spatial Reasoning of Large Mutimodal Models: Xingrui Wang,

Wufei Ma,

Tiezheng Zhang,

Celso M de Melo,

Jieneng Chen,

Alan Yuille; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Xingrui and Ma, Wufei and Zhang, Tiezheng and de Melo, Celso M and Chen, Jieneng and Yuille, Alan}, title = {Spatial457: A Diagnostic Benchmark for 6D Spatial Reasoning of Large Mutimodal Models}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24669-24679} }
Omnidirectional Multi-Object Tracking: Kai Luo,

Hao Shi,

Sheng Wu,

Fei Teng,

Mengfei Duan,

Chang Huang,

Yuhang Wang,

Kaiwei Wang,

Kailun Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2025_CVPR, author = {Luo, Kai and Shi, Hao and Wu, Sheng and Teng, Fei and Duan, Mengfei and Huang, Chang and Wang, Yuhang and Wang, Kaiwei and Yang, Kailun}, title = {Omnidirectional Multi-Object Tracking}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {21959-21969} }
Potential Field Based Deep Metric Learning: Shubhang Bhatnagar,

Narendra Ahuja; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhatnagar_2025_CVPR, author = {Bhatnagar, Shubhang and Ahuja, Narendra}, title = {Potential Field Based Deep Metric Learning}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25549-25559} }
Enhancing Vision-Language Compositional Understanding with Multimodal Synthetic Data: Haoxin Li,

Boyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2025_CVPR, author = {Li, Haoxin and Li, Boyang}, title = {Enhancing Vision-Language Compositional Understanding with Multimodal Synthetic Data}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24849-24861} }
Directional Label Diffusion Model for Learning from Noisy Labels: Senyu Hou,

Gaoxia Jiang,

Jia Zhang,

Shangrong Yang,

Husheng Guo,

Yaqing Guo,

Wenjian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Hou_2025_CVPR, author = {Hou, Senyu and Jiang, Gaoxia and Zhang, Jia and Yang, Shangrong and Guo, Husheng and Guo, Yaqing and Wang, Wenjian}, title = {Directional Label Diffusion Model for Learning from Noisy Labels}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25738-25748} }
Learning Endogenous Attention for Incremental Object Detection: Xiang Song,

Yuhang He,

Jingyuan Li,

Qiang Wang,

Yihong Gong; [pdf] [supp]
[bibtex]
@InProceedings{Song_2025_CVPR, author = {Song, Xiang and He, Yuhang and Li, Jingyuan and Wang, Qiang and Gong, Yihong}, title = {Learning Endogenous Attention for Incremental Object Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {30354-30364} }
StarGen: A Spatiotemporal Autoregression Framework with Video Diffusion Model for Scalable and Controllable Scene Generation: Shangjin Zhai,

Zhichao Ye,

Jialin Liu,

Weijian Xie,

Jiaqi Hu,

Zhen Peng,

Hua Xue,

Danpeng Chen,

Xiaomeng Wang,

Lei Yang,

Nan Wang,

Haomin Liu,

Guofeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhai_2025_CVPR, author = {Zhai, Shangjin and Ye, Zhichao and Liu, Jialin and Xie, Weijian and Hu, Jiaqi and Peng, Zhen and Xue, Hua and Chen, Danpeng and Wang, Xiaomeng and Yang, Lei and Wang, Nan and Liu, Haomin and Zhang, Guofeng}, title = {StarGen: A Spatiotemporal Autoregression Framework with Video Diffusion Model for Scalable and Controllable Scene Generation}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {26822-26833} }
HomoGen: Enhanced Video Inpainting via Homography Propagation and Diffusion: Ding Ding,

Yueming Pan,

Ruoyu Feng,

Qi Dai,

Kai Qiu,

Jianmin Bao,

Chong Luo,

Zhenzhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2025_CVPR, author = {Ding, Ding and Pan, Yueming and Feng, Ruoyu and Dai, Qi and Qiu, Kai and Bao, Jianmin and Luo, Chong and Chen, Zhenzhong}, title = {HomoGen: Enhanced Video Inpainting via Homography Propagation and Diffusion}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {22953-22962} }
Do ImageNet-trained Models Learn Shortcuts? The Impact of Frequency Shortcuts on Generalization: Shunxin Wang,

Raymond Veldhuis,

Nicola Strisciuglio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2025_CVPR, author = {Wang, Shunxin and Veldhuis, Raymond and Strisciuglio, Nicola}, title = {Do ImageNet-trained Models Learn Shortcuts? The Impact of Frequency Shortcuts on Generalization}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25198-25207} }
HORP: Human-Object Relation Priors Guided HOI Detection: Pei Geng,

Jian Yang,

Shanshan Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Geng_2025_CVPR, author = {Geng, Pei and Yang, Jian and Zhang, Shanshan}, title = {HORP: Human-Object Relation Priors Guided HOI Detection}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {25325-25335} }
Building a Mind Palace: Structuring Environment-Grounded Semantic Graphs for Effective Long Video Analysis with LLMs: Zeyi Huang,

Yuyang Ji,

Xiaofang Wang,

Nikhil Mehta,

Tong Xiao,

Donghyun Lee,

Sigmund Vanvalkenburgh,

Shengxin Zha,

Bolin Lai,

Licheng Yu,

Ning Zhang,

Yong Jae Lee,

Miao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2025_CVPR, author = {Huang, Zeyi and Ji, Yuyang and Wang, Xiaofang and Mehta, Nikhil and Xiao, Tong and Lee, Donghyun and Vanvalkenburgh, Sigmund and Zha, Shengxin and Lai, Bolin and Yu, Licheng and Zhang, Ning and Lee, Yong Jae and Liu, Miao}, title = {Building a Mind Palace: Structuring Environment-Grounded Semantic Graphs for Effective Long Video Analysis with LLMs}, booktitle = {Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR)}, month = {June}, year = {2025}, pages = {24169-24179} }; Back